You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2018/08/30 20:40:28 UTC
[mesos] 01/02: Augmented `Statistics` to work with any collection.
This is an automated email from the ASF dual-hosted git repository.
alexr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git
commit 21e4b2bdc70c2726a193e2c27bf7c1ebb798ca57
Author: Alexander Rukletsov <al...@apache.org>
AuthorDate: Thu Aug 2 12:21:48 2018 +0200
Augmented `Statistics` to work with any collection.
Review: https://reviews.apache.org/r/68224
---
3rdparty/libprocess/include/process/statistics.hpp | 86 ++++++++++++++++------
3rdparty/libprocess/src/tests/statistics_tests.cpp | 27 ++++++-
2 files changed, 89 insertions(+), 24 deletions(-)
diff --git a/3rdparty/libprocess/include/process/statistics.hpp b/3rdparty/libprocess/include/process/statistics.hpp
index e9f1fc2..7d787c4 100644
--- a/3rdparty/libprocess/include/process/statistics.hpp
+++ b/3rdparty/libprocess/include/process/statistics.hpp
@@ -16,6 +16,8 @@
#include <glog/logging.h>
#include <algorithm>
+#include <iterator>
+#include <type_traits>
#include <vector>
#include <process/timeseries.hpp>
@@ -25,23 +27,23 @@
namespace process {
-// Represents statistics for a TimeSeries of data.
+// Represents statistics for a `TimeSeries` of data or a standard container.
template <typename T>
struct Statistics
{
- // Returns Statistics for the given TimeSeries, or None() if the
- // TimeSeries is empty.
+ // Returns `Statistics` for the given `TimeSeries`, or `None` if the
+ // `TimeSeries` has less then 2 datapoints.
+ //
// TODO(dhamon): Consider adding a histogram abstraction for better
// performance.
+ //
+ // Remove this specification once we can construct directly from
+ // `TimeSeries<T>::Value`, e.g., by using an iterator adaptor, see
+ // https://www.boost.org/doc/libs/1_51_0/libs/range/doc/html/range/reference/adaptors/reference/map_values.html // NOLINT(whitespace/line_length)
static Option<Statistics<T>> from(const TimeSeries<T>& timeseries)
{
std::vector<typename TimeSeries<T>::Value> values_ = timeseries.get();
- // We need at least 2 values to compute aggregates.
- if (values_.size() < 2) {
- return None();
- }
-
std::vector<T> values;
values.reserve(values_.size());
@@ -49,23 +51,33 @@ struct Statistics
values.push_back(value.data);
}
- std::sort(values.begin(), values.end());
-
- Statistics statistics;
-
- statistics.count = values.size();
+ return from(std::move(values));
+ }
- statistics.min = values.front();
- statistics.max = values.back();
+ // Returns `Statistics` for the given container, or `None` if the container
+ // has less then 2 datapoints. The container is represented as a pair of
+ // [first, last) iterators.
+ //
+ // TODO(alexr): Consider relaxing the collection type requirement to
+ // `std::is_convertible<std::iterator_traits<It>::value_type, T>`.
+ template <
+ typename It,
+ typename = typename std::enable_if<
+ std::is_same<
+ typename std::iterator_traits<It>::value_type,
+ T>::value &&
+ std::is_convertible<
+ typename std::iterator_traits<It>::iterator_category,
+ std::forward_iterator_tag>::value>::type>
+ static Option<Statistics<T>> from(It first, It last)
+ {
+ // Copy values into a vector.
+ std::vector<T> values;
+ values.reserve(std::distance(first, last));
- statistics.p50 = percentile(values, 0.5);
- statistics.p90 = percentile(values, 0.90);
- statistics.p95 = percentile(values, 0.95);
- statistics.p99 = percentile(values, 0.99);
- statistics.p999 = percentile(values, 0.999);
- statistics.p9999 = percentile(values, 0.9999);
+ std::copy(first, last, std::back_inserter(values));
- return statistics;
+ return from(std::move(values));
}
size_t count;
@@ -82,8 +94,36 @@ struct Statistics
T p9999;
private:
+ // Calculates `Statistics` from the provided vector; note pass by reference.
+ static Option<Statistics<T>> from(std::vector<T>&& values)
+ {
+ // We need at least 2 values to compute aggregates.
+ if (values.size() < 2) {
+ return None();
+ }
+
+ std::sort(values.begin(), values.end());
+
+ Statistics statistics;
+
+ statistics.count = values.size();
+
+ statistics.min = values.front();
+ statistics.max = values.back();
+
+ statistics.p50 = percentile(values, 0.5);
+ statistics.p90 = percentile(values, 0.90);
+ statistics.p95 = percentile(values, 0.95);
+ statistics.p99 = percentile(values, 0.99);
+ statistics.p999 = percentile(values, 0.999);
+ statistics.p9999 = percentile(values, 0.9999);
+
+ return statistics;
+ }
+
// Returns the requested percentile from the sorted values.
// Note that we need at least two values to compute percentiles!
+ //
// TODO(dhamon): Use a 'Percentage' abstraction.
static T percentile(const std::vector<T>& values, double percentile)
{
@@ -105,7 +145,7 @@ private:
CHECK_GE(index, 0u);
CHECK_LT(index, values.size() - 1);
- return values[index] + delta * (values[index + 1] - values[index]);
+ return values[index] + (values[index + 1] - values[index]) * delta;
}
};
diff --git a/3rdparty/libprocess/src/tests/statistics_tests.cpp b/3rdparty/libprocess/src/tests/statistics_tests.cpp
index a2a780b..96f5fd3 100644
--- a/3rdparty/libprocess/src/tests/statistics_tests.cpp
+++ b/3rdparty/libprocess/src/tests/statistics_tests.cpp
@@ -12,6 +12,8 @@
#include <gtest/gtest.h>
+#include <list>
+
#include <process/clock.hpp>
#include <process/statistics.hpp>
@@ -23,6 +25,8 @@ using process::Statistics;
using process::Time;
using process::TimeSeries;
+using std::list;
+
TEST(StatisticsTest, Empty)
{
TimeSeries<double> timeseries;
@@ -41,7 +45,7 @@ TEST(StatisticsTest, Single)
}
-TEST(StatisticsTest, Statistics)
+TEST(StatisticsTest, StatisticsFromTimeSeries)
{
// Create a distribution of 10 values from -5 to 4.
TimeSeries<double> timeseries;
@@ -69,3 +73,24 @@ TEST(StatisticsTest, Statistics)
EXPECT_DOUBLE_EQ(4.99, statistics->p999);
EXPECT_DOUBLE_EQ(4.999, statistics->p9999);
}
+
+
+TEST(StatisticsTest, StatisticsFromDurationList)
+{
+ list<Duration> values{
+ Seconds(0), Seconds(10), Seconds(20), Seconds(30), Seconds(40), Seconds(50),
+ Seconds(60), Seconds(70), Seconds(80), Seconds(90), Seconds(100)};
+
+ Option<Statistics<Duration>> statistics = Statistics<Duration>::from(
+ values.cbegin(), values.cend());
+
+ EXPECT_SOME(statistics);
+
+ EXPECT_EQ(11u, statistics->count);
+
+ EXPECT_EQ(Seconds(0), statistics->min);
+ EXPECT_EQ(Seconds(100), statistics->max);
+
+ EXPECT_EQ(Seconds(50), statistics->p50);
+ EXPECT_EQ(Seconds(90), statistics->p90);
+}