You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2018/08/30 20:40:28 UTC

[mesos] 01/02: Augmented `Statistics` to work with any collection.

This is an automated email from the ASF dual-hosted git repository.

alexr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 21e4b2bdc70c2726a193e2c27bf7c1ebb798ca57
Author: Alexander Rukletsov <al...@apache.org>
AuthorDate: Thu Aug 2 12:21:48 2018 +0200

    Augmented `Statistics` to work with any collection.
    
    Review: https://reviews.apache.org/r/68224
---
 3rdparty/libprocess/include/process/statistics.hpp | 86 ++++++++++++++++------
 3rdparty/libprocess/src/tests/statistics_tests.cpp | 27 ++++++-
 2 files changed, 89 insertions(+), 24 deletions(-)

diff --git a/3rdparty/libprocess/include/process/statistics.hpp b/3rdparty/libprocess/include/process/statistics.hpp
index e9f1fc2..7d787c4 100644
--- a/3rdparty/libprocess/include/process/statistics.hpp
+++ b/3rdparty/libprocess/include/process/statistics.hpp
@@ -16,6 +16,8 @@
 #include <glog/logging.h>
 
 #include <algorithm>
+#include <iterator>
+#include <type_traits>
 #include <vector>
 
 #include <process/timeseries.hpp>
@@ -25,23 +27,23 @@
 
 namespace process {
 
-// Represents statistics for a TimeSeries of data.
+// Represents statistics for a `TimeSeries` of data or a standard container.
 template <typename T>
 struct Statistics
 {
-  // Returns Statistics for the given TimeSeries, or None() if the
-  // TimeSeries is empty.
+  // Returns `Statistics` for the given `TimeSeries`, or `None` if the
+  // `TimeSeries` has less then 2 datapoints.
+  //
   // TODO(dhamon): Consider adding a histogram abstraction for better
   // performance.
+  //
+  // Remove this specification once we can construct directly from
+  // `TimeSeries<T>::Value`, e.g., by using an iterator adaptor, see
+  // https://www.boost.org/doc/libs/1_51_0/libs/range/doc/html/range/reference/adaptors/reference/map_values.html // NOLINT(whitespace/line_length)
   static Option<Statistics<T>> from(const TimeSeries<T>& timeseries)
   {
     std::vector<typename TimeSeries<T>::Value> values_ = timeseries.get();
 
-    // We need at least 2 values to compute aggregates.
-    if (values_.size() < 2) {
-      return None();
-    }
-
     std::vector<T> values;
     values.reserve(values_.size());
 
@@ -49,23 +51,33 @@ struct Statistics
       values.push_back(value.data);
     }
 
-    std::sort(values.begin(), values.end());
-
-    Statistics statistics;
-
-    statistics.count = values.size();
+    return from(std::move(values));
+  }
 
-    statistics.min = values.front();
-    statistics.max = values.back();
+  // Returns `Statistics` for the given container, or `None` if the container
+  // has less then 2 datapoints. The container is represented as a pair of
+  // [first, last) iterators.
+  //
+  // TODO(alexr): Consider relaxing the collection type requirement to
+  // `std::is_convertible<std::iterator_traits<It>::value_type, T>`.
+  template <
+      typename It,
+      typename = typename std::enable_if<
+          std::is_same<
+              typename std::iterator_traits<It>::value_type,
+              T>::value &&
+          std::is_convertible<
+              typename std::iterator_traits<It>::iterator_category,
+              std::forward_iterator_tag>::value>::type>
+  static Option<Statistics<T>> from(It first, It last)
+  {
+    // Copy values into a vector.
+    std::vector<T> values;
+    values.reserve(std::distance(first, last));
 
-    statistics.p50 = percentile(values, 0.5);
-    statistics.p90 = percentile(values, 0.90);
-    statistics.p95 = percentile(values, 0.95);
-    statistics.p99 = percentile(values, 0.99);
-    statistics.p999 = percentile(values, 0.999);
-    statistics.p9999 = percentile(values, 0.9999);
+    std::copy(first, last, std::back_inserter(values));
 
-    return statistics;
+    return from(std::move(values));
   }
 
   size_t count;
@@ -82,8 +94,36 @@ struct Statistics
   T p9999;
 
 private:
+  // Calculates `Statistics` from the provided vector; note pass by reference.
+  static Option<Statistics<T>> from(std::vector<T>&& values)
+  {
+    // We need at least 2 values to compute aggregates.
+    if (values.size() < 2) {
+      return None();
+    }
+
+    std::sort(values.begin(), values.end());
+
+    Statistics statistics;
+
+    statistics.count = values.size();
+
+    statistics.min = values.front();
+    statistics.max = values.back();
+
+    statistics.p50 = percentile(values, 0.5);
+    statistics.p90 = percentile(values, 0.90);
+    statistics.p95 = percentile(values, 0.95);
+    statistics.p99 = percentile(values, 0.99);
+    statistics.p999 = percentile(values, 0.999);
+    statistics.p9999 = percentile(values, 0.9999);
+
+    return statistics;
+  }
+
   // Returns the requested percentile from the sorted values.
   // Note that we need at least two values to compute percentiles!
+  //
   // TODO(dhamon): Use a 'Percentage' abstraction.
   static T percentile(const std::vector<T>& values, double percentile)
   {
@@ -105,7 +145,7 @@ private:
     CHECK_GE(index, 0u);
     CHECK_LT(index, values.size() - 1);
 
-    return values[index] + delta * (values[index + 1] - values[index]);
+    return values[index] + (values[index + 1] - values[index]) * delta;
   }
 };
 
diff --git a/3rdparty/libprocess/src/tests/statistics_tests.cpp b/3rdparty/libprocess/src/tests/statistics_tests.cpp
index a2a780b..96f5fd3 100644
--- a/3rdparty/libprocess/src/tests/statistics_tests.cpp
+++ b/3rdparty/libprocess/src/tests/statistics_tests.cpp
@@ -12,6 +12,8 @@
 
 #include <gtest/gtest.h>
 
+#include <list>
+
 #include <process/clock.hpp>
 #include <process/statistics.hpp>
 
@@ -23,6 +25,8 @@ using process::Statistics;
 using process::Time;
 using process::TimeSeries;
 
+using std::list;
+
 TEST(StatisticsTest, Empty)
 {
   TimeSeries<double> timeseries;
@@ -41,7 +45,7 @@ TEST(StatisticsTest, Single)
 }
 
 
-TEST(StatisticsTest, Statistics)
+TEST(StatisticsTest, StatisticsFromTimeSeries)
 {
   // Create a distribution of 10 values from -5 to 4.
   TimeSeries<double> timeseries;
@@ -69,3 +73,24 @@ TEST(StatisticsTest, Statistics)
   EXPECT_DOUBLE_EQ(4.99, statistics->p999);
   EXPECT_DOUBLE_EQ(4.999, statistics->p9999);
 }
+
+
+TEST(StatisticsTest, StatisticsFromDurationList)
+{
+  list<Duration> values{
+    Seconds(0), Seconds(10), Seconds(20), Seconds(30), Seconds(40), Seconds(50),
+    Seconds(60), Seconds(70), Seconds(80), Seconds(90), Seconds(100)};
+
+  Option<Statistics<Duration>> statistics = Statistics<Duration>::from(
+      values.cbegin(), values.cend());
+
+  EXPECT_SOME(statistics);
+
+  EXPECT_EQ(11u, statistics->count);
+
+  EXPECT_EQ(Seconds(0), statistics->min);
+  EXPECT_EQ(Seconds(100), statistics->max);
+
+  EXPECT_EQ(Seconds(50), statistics->p50);
+  EXPECT_EQ(Seconds(90), statistics->p90);
+}