You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/04 03:12:45 UTC
[incubator-datasketches-cpp] branch req_sketch updated: min and max
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/req_sketch by this push:
new 0df4a38 min and max
0df4a38 is described below
commit 0df4a388b829b37ebf674487277b2487947174c0
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 3 19:12:36 2020 -0800
min and max
---
req/include/req_sketch.hpp | 47 +++++++++++++++++++++++++++++++-
req/include/req_sketch_impl.hpp | 59 ++++++++++++++++++++++++++++++-----------
req/test/req_sketch_test.cpp | 12 ++++++---
3 files changed, 98 insertions(+), 20 deletions(-)
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 30841c3..293d855 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -37,7 +37,9 @@ class req_sketch {
public:
using Compactor = req_compactor<T, IsHighRank, Comparator, Allocator>;
- req_sketch(uint32_t k, const Allocator& allocator = Allocator());
+ explicit req_sketch(uint32_t k, const Allocator& allocator = Allocator());
+ ~req_sketch();
+ // TODO: copy, move, assign
/**
* Returns true if this sketch is empty.
@@ -67,6 +69,22 @@ public:
void update(FwdT&& item);
/**
+ * Returns the min value of the stream.
+ * For floating point types: if the sketch is empty this returns NaN.
+ * For other types: if the sketch is empty this throws runtime_error.
+ * @return the min value of the stream
+ */
+ const T& get_min_value() const;
+
+ /**
+ * Returns the max value of the stream.
+ * For floating point types: if the sketch is empty this returns NaN.
+ * For other types: if the sketch is empty this throws runtime_error.
+ * @return the max value of the stream
+ */
+ const T& get_max_value() const;
+
+ /**
* Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
* With the template parameter inclusive=true the weight of the given item is included into the rank.
* Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
@@ -76,6 +94,7 @@ public:
* @param item to be ranked
* @return an approximate rank of the given item
*/
+
template<bool inclusive = false>
double get_rank(const T& item) const;
@@ -97,12 +116,38 @@ private:
uint64_t n_;
using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
std::vector<Compactor, AllocCompactor> compactors_;
+ T* min_value_;
+ T* max_value_;
uint8_t get_num_levels() const;
void grow();
void update_max_nom_size();
void update_num_retained();
void compress();
+
+ // implementations for floating point types
+ template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
+ static const TT& get_invalid_value() {
+ static TT value = std::numeric_limits<TT>::quiet_NaN();
+ return value;
+ }
+
+ template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
+ static inline bool check_update_value(const TT& value) {
+ return !std::isnan(value);
+ }
+
+ // implementations for all other types
+ template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
+ static const TT& get_invalid_value() {
+ throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
+ }
+
+ template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
+ static inline bool check_update_value(const TT&) {
+ return true;
+ }
+
};
} /* namespace datasketches */
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 86a6ae3..b37d434 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -27,15 +27,30 @@ namespace datasketches {
template<typename T, bool H, typename C, typename S, typename A>
req_sketch<T, H, C, S, A>::req_sketch(uint32_t k, const A& allocator):
allocator_(allocator),
-k_(k),
+k_(std::max(k & -2, req_constants::MIN_K)), //rounds down one if odd
max_nom_size_(0),
num_retained_(0),
-n_(0)
+n_(0),
+compactors_(allocator),
+min_value_(nullptr),
+max_value_(nullptr)
{
grow();
}
template<typename T, bool H, typename C, typename S, typename A>
+req_sketch<T, H, C, S, A>::~req_sketch() {
+ if (min_value_ != nullptr) {
+ min_value_->~T();
+ allocator_.deallocate(min_value_, 1);
+ }
+ if (max_value_ != nullptr) {
+ max_value_->~T();
+ allocator_.deallocate(max_value_, 1);
+ }
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
bool req_sketch<T, H, C, S, A>::is_empty() const {
return n_ == 0;
}
@@ -58,14 +73,14 @@ bool req_sketch<T, H, C, S, A>::is_estimation_mode() const {
template<typename T, bool H, typename C, typename S, typename A>
template<typename FwdT>
void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
-// if (Float.isNaN(item)) { return; }
-// if (isEmpty()) {
-// minValue = item;
-// maxValue = item;
-// } else {
-// if (item < minValue) { minValue = item; }
-// if (item > maxValue) { maxValue = item; }
-// }
+ if (!check_update_value(item)) { return; }
+ if (is_empty()) {
+ min_value_ = new (allocator_.allocate(1)) T(item);
+ max_value_ = new (allocator_.allocate(1)) T(item);
+ } else {
+ if (C()(item, *min_value_)) *min_value_ = item;
+ if (C()(*max_value_, item)) *max_value_ = item;
+ }
compactors_[0].append(item);
++num_retained_;
++n_;
@@ -73,9 +88,19 @@ void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
compactors_[0].sort();
compress();
}
- // aux = null;
}
+template<typename T, bool H, typename C, typename S, typename A>
+const T& req_sketch<T, H, C, S, A>::get_min_value() const {
+ if (is_empty()) return get_invalid_value();
+ return *min_value_;
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
+const T& req_sketch<T, H, C, S, A>::get_max_value() const {
+ if (is_empty()) return get_invalid_value();
+ return *max_value_;
+}
template<typename T, bool H, typename C, typename S, typename A>
template<bool inclusive>
@@ -90,7 +115,9 @@ double req_sketch<T, H, C, S, A>::get_rank(const T& item) const {
template<typename T, bool H, typename C, typename S, typename A>
template<bool inclusive>
const T& req_sketch<T, H, C, S, A>::get_quantile(double rank) const {
- if (is_empty()) throw new std::invalid_argument("sketch is empty");
+ if (is_empty()) return get_invalid_value();
+ if (rank == 0.0) return *min_value_;
+ if (rank == 1.0) return *max_value_;
if ((rank < 0.0) || (rank > 1.0)) {
throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
}
@@ -161,10 +188,10 @@ string<A> req_sketch<T, H, C, S, A>::to_string(bool print_levels, bool print_ite
os << " Retained items : " << num_retained_ << std::endl;
os << " Capacity items : " << max_nom_size_ << std::endl;
// os << " Storage bytes : " << get_serialized_size_bytes() << std::endl;
-// if (!is_empty()) {
-// os << " Min value : " << *min_value_ << std::endl;
-// os << " Max value : " << *max_value_ << std::endl;
-// }
+ if (!is_empty()) {
+ os << " Min value : " << *min_value_ << std::endl;
+ os << " Max value : " << *max_value_ << std::endl;
+ }
os << "### End sketch summary" << std::endl;
if (print_levels) {
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index d55907f..3f607de 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -39,6 +39,11 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
REQUIRE(sketch.get_num_retained() == 0);
REQUIRE(std::isnan(sketch.get_rank(0)));
REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
+ REQUIRE(std::isnan(sketch.get_min_value()));
+ REQUIRE(std::isnan(sketch.get_max_value()));
+ REQUIRE(std::isnan(sketch.get_quantile(0)));
+ REQUIRE(std::isnan(sketch.get_quantile(0.5)));
+ REQUIRE(std::isnan(sketch.get_quantile(1)));
}
TEST_CASE("req sketch: single value", "[req_sketch]") {
@@ -113,19 +118,20 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
}
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
- std::cout << "estimation mode test\n";
req_sketch<float, true> sketch(100);
- const size_t n = 1250;
+ const size_t n = 100000;
for (size_t i = 0; i < n; ++i) sketch.update(i);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == n);
- std::cout << sketch.to_string(true, true);
+// std::cout << sketch.to_string(true);
REQUIRE(sketch.get_num_retained() < n);
REQUIRE(sketch.get_rank(0) == 0);
REQUIRE(sketch.get_rank(n) == 1);
REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
+ REQUIRE(sketch.get_min_value() == 0);
+ REQUIRE(sketch.get_max_value() == n - 1);
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org