You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ya...@apache.org on 2017/07/25 22:17:46 UTC

[2/3] mesos git commit: Add fetcher cache space usage metrics.

Add fetcher cache space usage metrics.

Add fetcher metrics to track the (constant) size of the cache
size and the (varying) amount of cache space use. The cache size
is published as `containerizer/fetcher/cache_size_total_bytes`
and the used space is `containerizer/fetcher/cache_size_used_bytes`.

Review: https://reviews.apache.org/r/60791/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/d26db215
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/d26db215
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/d26db215

Branch: refs/heads/master
Commit: d26db215f374e9b873cc45eb0514424ef9577a1d
Parents: 637ddc7
Author: James Peach <jp...@apache.org>
Authored: Tue Jul 25 15:12:52 2017 -0700
Committer: Jiang Yan Xu <xu...@apple.com>
Committed: Tue Jul 25 15:12:52 2017 -0700

----------------------------------------------------------------------
 docs/monitoring.md                          | 15 ++++++
 src/slave/containerizer/fetcher.cpp         | 34 +++++++++++-
 src/slave/containerizer/fetcher_process.hpp |  6 +++
 src/tests/fetcher_cache_tests.cpp           | 69 ++++++++++++++++++++++++
 4 files changed, 123 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/d26db215/docs/monitoring.md
----------------------------------------------------------------------
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 38b8093..d1f64d4 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1255,6 +1255,21 @@ the agent and their current usage.
 </thead>
 <tr>
   <td>
+  <code>containerizer/fetcher/cache_size_total_bytes</code>
+  </td>
+  <td>The configured maximum size of the fetcher cache in bytes. This value is
+  constant for the life of the Mesos agent.</td>
+  <td>Gauge</td>
+</tr>
+<tr>
+  <td>
+  <code>containerizer/fetcher/cache_size_used_bytes</code>
+  </td>
+  <td>The current amount of data stored in the fetcher cache in bytes.</td>
+  <td>Gauge</td>
+</tr>
+<tr>
+  <td>
   <code>slave/cpus_percent</code>
   </td>
   <td>Percentage of allocated CPUs</td>

http://git-wip-us.apache.org/repos/asf/mesos/blob/d26db215/src/slave/containerizer/fetcher.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher.cpp b/src/slave/containerizer/fetcher.cpp
index bc8a4b6..fdeb9de 100644
--- a/src/slave/containerizer/fetcher.cpp
+++ b/src/slave/containerizer/fetcher.cpp
@@ -261,10 +261,24 @@ void Fetcher::kill(const ContainerID& containerId)
 
 FetcherProcess::Metrics::Metrics(FetcherProcess *fetcher)
   : task_fetches_total("containerizer/fetcher/task_fetches_total"),
-    task_fetches_failed("containerizer/fetcher/task_fetches_failed")
+    task_fetches_failed("containerizer/fetcher/task_fetches_failed"),
+    cache_size_total_bytes(
+        "containerizer/fetcher/cache_size_total_bytes",
+        [=]() {
+          // This value is safe to read while it is concurrently updated.
+          return fetcher->cache.totalSpace().bytes();
+        }),
+    cache_size_used_bytes(
+        "containerizer/fetcher/cache_size_used_bytes",
+        [=]() {
+          // This value is safe to read while it is concurrently updated.
+          return fetcher->cache.usedSpace().bytes();
+        })
 {
   process::metrics::add(task_fetches_total);
   process::metrics::add(task_fetches_failed);
+  process::metrics::add(cache_size_total_bytes);
+  process::metrics::add(cache_size_used_bytes);
 }
 
 
@@ -272,6 +286,12 @@ FetcherProcess::Metrics::~Metrics()
 {
   process::metrics::remove(task_fetches_total);
   process::metrics::remove(task_fetches_failed);
+
+  // Wait for the metrics to be removed before we allow the destructor
+  // to complete.
+  await(
+      process::metrics::remove(cache_size_total_bytes),
+      process::metrics::remove(cache_size_used_bytes)).await();
 }
 
 
@@ -1204,6 +1224,18 @@ void FetcherProcess::Cache::releaseSpace(const Bytes& bytes)
 }
 
 
+Bytes FetcherProcess::Cache::totalSpace() const
+{
+  return space;
+}
+
+
+Bytes FetcherProcess::Cache::usedSpace() const
+{
+  return tally;
+}
+
+
 Bytes FetcherProcess::Cache::availableSpace() const
 {
   if (tally > space) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/d26db215/src/slave/containerizer/fetcher_process.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/fetcher_process.hpp b/src/slave/containerizer/fetcher_process.hpp
index 25bbe73..13812b3 100644
--- a/src/slave/containerizer/fetcher_process.hpp
+++ b/src/slave/containerizer/fetcher_process.hpp
@@ -28,6 +28,7 @@
 #include <process/process.hpp>
 
 #include <process/metrics/counter.hpp>
+#include <process/metrics/gauge.hpp>
 
 #include <stout/hashmap.hpp>
 
@@ -143,6 +144,8 @@ public:
     void claimSpace(const Bytes& bytes);
     void releaseSpace(const Bytes& bytes);
 
+    Bytes totalSpace() const;
+    Bytes usedSpace() const;
     Bytes availableSpace() const;
 
     // Invents a new, distinct base name for a cache file, using the same
@@ -259,6 +262,9 @@ private:
     // fail to fetch, the failure count will only increase by one.
     process::metrics::Counter task_fetches_total;
     process::metrics::Counter task_fetches_failed;
+
+    process::metrics::Gauge cache_size_total_bytes;
+    process::metrics::Gauge cache_size_used_bytes;
   } metrics;
 
   const Flags flags;

http://git-wip-us.apache.org/repos/asf/mesos/blob/d26db215/src/tests/fetcher_cache_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/fetcher_cache_tests.cpp b/src/tests/fetcher_cache_tests.cpp
index 1c654e5..6d212cd 100644
--- a/src/tests/fetcher_cache_tests.cpp
+++ b/src/tests/fetcher_cache_tests.cpp
@@ -38,6 +38,7 @@
 #include <process/queue.hpp>
 #include <process/subprocess.hpp>
 
+#include <stout/json.hpp>
 #include <stout/option.hpp>
 #include <stout/os.hpp>
 #include <stout/path.hpp>
@@ -58,6 +59,7 @@
 #include "tests/flags.hpp"
 #include "tests/mesos.hpp"
 #include "tests/mock_fetcher.hpp"
+#include "tests/utils.hpp"
 
 using mesos::fetcher::FetcherInfo;
 
@@ -147,6 +149,8 @@ protected:
 
   Try<vector<Task>> launchTasks(const vector<CommandInfo>& commandInfos);
 
+  void verifyCacheMetrics();
+
   // Promises whose futures indicate that FetcherProcess::_fetch() has been
   // called for a task with a given index.
   vector<Owned<Promise<Nothing>>> fetchContentionWaypoints;
@@ -251,6 +255,43 @@ static void logSandbox(const Path& path)
 }
 
 
+void FetcherCacheTest::verifyCacheMetrics()
+{
+  JSON::Object metrics = Metrics();
+
+  ASSERT_EQ(
+      1u,
+      metrics.values.count("containerizer/fetcher/cache_size_total_bytes"));
+
+  // The total size is always given by the corresponding agent flag.
+  EXPECT_SOME_EQ(
+      flags.fetcher_cache_size.bytes(),
+      metrics.at<JSON::Number>("containerizer/fetcher/cache_size_total_bytes"));
+
+  Try<std::list<Path>> files = fetcherProcess->cacheFiles();
+  ASSERT_SOME(files);
+
+  Bytes used;
+
+  foreach (const auto& file, files.get()) {
+    Try<Bytes> size = os::stat::size(file);
+    ASSERT_SOME(size);
+
+    used += size.get();
+  }
+
+  ASSERT_EQ(
+      1u,
+      metrics.values.count("containerizer/fetcher/cache_size_used_bytes"));
+
+  // Verify that the used amount of cache is the total of the size of
+  // all the files in the cache.
+  EXPECT_SOME_EQ(
+      used.bytes(),
+      metrics.at<JSON::Number>("containerizer/fetcher/cache_size_used_bytes"));
+}
+
+
 void FetcherCacheTest::TearDown()
 {
   if (HasFatalFailure()) {
@@ -656,6 +697,8 @@ TEST_F(FetcherCacheTest, LocalCached)
     EXPECT_EQ(1u, fetcherProcess->cacheSize());
     ASSERT_SOME(fetcherProcess->cacheFiles());
     EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
+
+    verifyCacheMetrics();
   }
 }
 
@@ -686,6 +729,8 @@ TEST_F(FetcherCacheTest, CachedCustomFilename)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
+
   // Verify that the downloaded executable lives at our custom output path.
   const string executablePath = path::join(
     task->runDirectory.string(), customOutputFile);
@@ -728,6 +773,8 @@ TEST_F(FetcherCacheTest, CachedCustomOutputFileWithSubdirectory)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
+
   // Verify that the downloaded executable lives at our custom output file
   // path.
   const string executablePath = path::join(
@@ -791,6 +838,8 @@ TEST_F(FetcherCacheTest, CachedFallback)
   EXPECT_EQ(0u, fetcherProcess->cacheSize());
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(0u, fetcherProcess->cacheFiles()->size());
+
+  verifyCacheMetrics();
 }
 
 
@@ -828,6 +877,8 @@ TEST_F(FetcherCacheTest, LocalUncachedExtract)
   EXPECT_EQ(0u, fetcherProcess->cacheSize());
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(0u, fetcherProcess->cacheFiles()->size());
+
+  verifyCacheMetrics();
 }
 
 
@@ -863,6 +914,8 @@ TEST_F(FetcherCacheTest, LocalCachedExtract)
     EXPECT_EQ(1u, fetcherProcess->cacheSize());
     ASSERT_SOME(fetcherProcess->cacheFiles());
     EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
+
+    verifyCacheMetrics();
   }
 }
 
@@ -1013,6 +1066,8 @@ TEST_F(FetcherCacheHttpTest, HttpCachedSerialized)
     ASSERT_SOME(fetcherProcess->cacheFiles());
     EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+    verifyCacheMetrics();
+
     // 2 requests: 1 for content-length, 1 for download.
     EXPECT_EQ(2u, httpServer->countCommandRequests);
   }
@@ -1078,6 +1133,8 @@ TEST_F(FetcherCacheHttpTest, HttpCachedConcurrent)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
+
   // HTTP requests regarding the archive asset as follows. Archive
   // "content-length" requests: 1, archive file downloads: 2.
   EXPECT_EQ(2u, httpServer->countCommandRequests);
@@ -1187,6 +1244,8 @@ TEST_F(FetcherCacheHttpTest, HttpMixed)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
+
   // HTTP requests regarding the command asset as follows. Command
   // "content-length" requests: 0, command file downloads: 3.
   EXPECT_EQ(3u, httpServer->countCommandRequests);
@@ -1333,6 +1392,8 @@ TEST_F(FetcherCacheHttpTest, DISABLED_HttpCachedRecovery)
     ASSERT_SOME(fetcherProcess->cacheFiles());
     EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+    verifyCacheMetrics();
+
     // content-length requests: 1
     // downloads: 1
     EXPECT_EQ(2u, httpServer->countCommandRequests);
@@ -1391,6 +1452,8 @@ TEST_F(FetcherCacheTest, SimpleEviction)
                 fetcherProcess->cacheFiles()->size());
     }
   }
+
+  verifyCacheMetrics();
 }
 
 
@@ -1473,6 +1536,7 @@ TEST_F(FetcherCacheTest, FallbackFromEviction)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
 
   // Task 1:
 
@@ -1520,6 +1584,7 @@ TEST_F(FetcherCacheTest, FallbackFromEviction)
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
 
+  verifyCacheMetrics();
 
   // Task 2:
 
@@ -1564,6 +1629,8 @@ TEST_F(FetcherCacheTest, FallbackFromEviction)
   EXPECT_EQ(1u, fetcherProcess->cacheSize());
   ASSERT_SOME(fetcherProcess->cacheFiles());
   EXPECT_EQ(1u, fetcherProcess->cacheFiles()->size());
+
+  verifyCacheMetrics();
 }
 
 
@@ -1620,6 +1687,8 @@ TEST_F(FetcherCacheTest, RemoveLRUCacheEntries)
 
   EXPECT_EQ(2u, fetcherProcess->cacheSize());
 
+  verifyCacheMetrics();
+
   // FetcherProcess::cacheFiles returns all cache files that are in the cache
   // directory. We expect cmd1 and cmd2 to be there, cmd0 should have been
   // evicted.