You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/11/02 20:38:55 UTC
[datasketches-cpp] 01/01: throw upon undefined operation
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch quantiles_throw_if_empty
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit 45e1a424b475f7146a8e282783851e826032edbc
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Nov 2 13:38:47 2022 -0700
throw upon undefined operation
---
common/include/quantiles_sorted_view_impl.hpp | 3 +++
common/test/quantiles_sorted_view_test.cpp | 9 +++++++
kll/include/kll_sketch.hpp | 36 ++++++++-------------------
kll/include/kll_sketch_impl.hpp | 15 ++++++-----
kll/test/kll_sketch_test.cpp | 22 ++++++++--------
quantiles/include/quantiles_sketch.hpp | 33 +++++++-----------------
quantiles/include/quantiles_sketch_impl.hpp | 14 ++++++-----
quantiles/test/quantiles_sketch_test.cpp | 22 ++++++++--------
req/include/req_sketch.hpp | 33 +++++++++---------------
req/include/req_sketch_impl.hpp | 12 ++++++---
req/test/req_sketch_test.cpp | 29 ++++++++++-----------
11 files changed, 103 insertions(+), 125 deletions(-)
diff --git a/common/include/quantiles_sorted_view_impl.hpp b/common/include/quantiles_sorted_view_impl.hpp
index b6c9206..326301e 100755
--- a/common/include/quantiles_sorted_view_impl.hpp
+++ b/common/include/quantiles_sorted_view_impl.hpp
@@ -62,6 +62,7 @@ void quantiles_sorted_view<T, C, A>::convert_to_cummulative() {
template<typename T, typename C, typename A>
double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
auto it = inclusive ?
std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_))
: std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_));
@@ -73,6 +74,7 @@ double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) c
template<typename T, typename C, typename A>
auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
uint64_t weight = inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_;
auto it = inclusive ?
std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
@@ -83,6 +85,7 @@ auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) c
template<typename T, typename C, typename A>
auto quantiles_sorted_view<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
vector_double buckets(entries_.get_allocator());
if (entries_.size() == 0) return buckets;
check_split_points(split_points, size);
diff --git a/common/test/quantiles_sorted_view_test.cpp b/common/test/quantiles_sorted_view_test.cpp
index 3c7652c..a9ecfbd 100644
--- a/common/test/quantiles_sorted_view_test.cpp
+++ b/common/test/quantiles_sorted_view_test.cpp
@@ -28,6 +28,15 @@
namespace datasketches {
+TEST_CASE("empty", "sorted view") {
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
+ REQUIRE_THROWS_AS(view.get_rank(0), std::runtime_error);
+ REQUIRE_THROWS_AS(view.get_quantile(0), std::runtime_error);
+ const float split_points[1] {0};
+ REQUIRE_THROWS_AS(view.get_CDF(split_points, 1), std::runtime_error);
+ REQUIRE_THROWS_AS(view.get_PMF(split_points, 1), std::runtime_error);
+}
+
TEST_CASE("set 0", "sorted view") {
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
std::vector<float> l0 {10};
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index fe0bc21..d0cadc8 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -22,7 +22,6 @@
#include <memory>
#include <vector>
-#include <stdexcept>
#include "common_defs.hpp"
#include "serde.hpp"
@@ -228,16 +227,14 @@ class kll_sketch {
/**
* Returns the min item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the min item of the stream
*/
T get_min_item() const;
/**
* Returns the max item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the max item of the stream
*/
T get_max_item() const;
@@ -257,9 +254,8 @@ class kll_sketch {
/**
* Returns an item from the sketch that is the best approximation to an item
* from the original stream with the given rank.
- * <p>
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ *
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param rank of an item in the hypothetical sorted stream.
* @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
@@ -273,10 +269,11 @@ class kll_sketch {
* This returns an array that could have been generated by using get_quantile() for each
* rank separately.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param ranks given array of ranks in the hypothetical sorted stream.
* These ranks must be in the interval [0.0, 1.0].
+ * @param size the number of ranks in the array
* @param inclusive if true, the given ranks are considered inclusive (include weights of items)
*
* @return array of approximate quantiles corresponding to the given ranks in the same order.
@@ -289,7 +286,7 @@ class kll_sketch {
* This is a multiple-query version of get_quantile() that allows the caller to
* specify the number of evenly-spaced ranks.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param num an integer that specifies the number of evenly-spaced ranks.
* This must be an integer greater than 0. A value of 1 will return the quantile of rank 0.
@@ -309,7 +306,7 @@ class kll_sketch {
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
- * <p>If the sketch is empty the result is undefined (NaN).
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param item to be ranked.
* @param inclusive if true the weight of the given item is included into the rank.
@@ -327,7 +324,7 @@ class kll_sketch {
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(true) function.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
@@ -352,7 +349,7 @@ class kll_sketch {
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
@@ -571,24 +568,11 @@ class kll_sketch {
void check_sorting() const;
- // implementations for floating point types
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- static TT item = std::numeric_limits<TT>::quiet_NaN();
- return item;
- }
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(TT item) {
return !std::isnan(item);
}
- // implementations for all other types
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of item");
- }
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(TT) {
return true;
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index 4822098..894d7b2 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -23,6 +23,7 @@
#include <iostream>
#include <iomanip>
#include <sstream>
+#include <stdexcept>
#include "conditional_forward.hpp"
#include "count_zeros.hpp"
@@ -269,13 +270,13 @@ bool kll_sketch<T, C, A>::is_estimation_mode() const {
template<typename T, typename C, typename A>
T kll_sketch<T, C, A>::get_min_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *min_item_;
}
template<typename T, typename C, typename A>
T kll_sketch<T, C, A>::get_max_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *max_item_;
}
@@ -291,26 +292,28 @@ A kll_sketch<T, C, A>::get_allocator() const {
template<typename T, typename C, typename A>
double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_rank(item, inclusive);
}
template<typename T, typename C, typename A>
auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_PMF(split_points, size, inclusive);
}
template<typename T, typename C, typename A>
auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_CDF(split_points, size, inclusive);
}
template<typename T, typename C, typename A>
auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if ((rank < 0.0) || (rank > 1.0)) {
throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
}
@@ -321,8 +324,8 @@ auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> qua
template<typename T, typename C, typename A>
std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
std::vector<T, A> quantiles(allocator_);
- if (is_empty()) return quantiles;
quantiles.reserve(size);
// may have a side effect of sorting level zero if needed
@@ -340,7 +343,7 @@ std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32
template<typename T, typename C, typename A>
std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
- if (is_empty()) return std::vector<T, A>(allocator_);
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if (num == 0) {
throw std::invalid_argument("num must be > 0");
}
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index c317c1e..48b64e9 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -63,15 +63,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
- REQUIRE(std::isnan(sketch.get_rank(0)));
- REQUIRE(std::isnan(sketch.get_min_item()));
- REQUIRE(std::isnan(sketch.get_max_item()));
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
const double ranks[3] {0, 0.5, 1};
- REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
const float split_points[1] {0};
- REQUIRE(sketch.get_PMF(split_points, 1).size() == 0);
- REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
for (auto it: sketch) {
(void) it; // to suppress "unused" warning
@@ -284,8 +284,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -299,8 +299,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
diff --git a/quantiles/include/quantiles_sketch.hpp b/quantiles/include/quantiles_sketch.hpp
index 19f3519..da80766 100644
--- a/quantiles/include/quantiles_sketch.hpp
+++ b/quantiles/include/quantiles_sketch.hpp
@@ -216,16 +216,14 @@ public:
/**
* Returns the min item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the min item of the stream
*/
const T& get_min_item() const;
/**
* Returns the max item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the max item of the stream
*/
const T& get_max_item() const;
@@ -246,8 +244,7 @@ public:
* Returns an approximation to the data item associated with the given rank
* of a hypothetical sorted version of the input stream so far.
* <p>
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
*
* @param rank the specified normalized rank in the hypothetical sorted stream.
*
@@ -262,10 +259,11 @@ public:
* This returns an array that could have been generated by using get_quantile() for each
* normalized rank separately.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param ranks given array of normalized ranks in the hypothetical sorted stream.
* These ranks must be in the interval [0.0, 1.0], inclusive.
+ * @param size the number of ranks in the array
*
* @return array of approximations to items associated with given ranks in the same order as given ranks
* in the input array.
@@ -278,7 +276,7 @@ public:
* This is a multiple-query version of get_quantile() that allows the caller to
* specify the number of evenly-spaced normalized ranks.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param num an integer that specifies the number of evenly-spaced ranks.
* This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
@@ -297,7 +295,7 @@ public:
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
- * <p>If the sketch is empty this returns NaN.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param item to be ranked
* @param inclusive if true the weight of the given item is included into the rank.
@@ -314,7 +312,7 @@ public:
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(true) function.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
@@ -339,7 +337,7 @@ public:
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
@@ -569,24 +567,11 @@ private:
*/
static uint8_t lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit);
- // implementations for floating point types
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- static TT item = std::numeric_limits<TT>::quiet_NaN();
- return item;
- }
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(TT item) {
return !std::isnan(item);
}
- // implementations for all other types
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of items");
- }
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(TT) {
return true;
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index c0a4d7e..d0d972f 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -686,13 +686,13 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
template<typename T, typename C, typename A>
const T& quantiles_sketch<T, C, A>::get_min_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *min_item_;
}
template<typename T, typename C, typename A>
const T& quantiles_sketch<T, C, A>::get_max_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *max_item_;
}
@@ -761,7 +761,7 @@ quantiles_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() cons
template<typename T, typename C, typename A>
auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if ((rank < 0.0) || (rank > 1.0)) {
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
@@ -772,8 +772,8 @@ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const
template<typename T, typename C, typename A>
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
std::vector<T, A> quantiles(allocator_);
- if (is_empty()) return quantiles;
quantiles.reserve(size);
// possible side-effect: sorting base buffer
@@ -791,7 +791,7 @@ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks,
template<typename T, typename C, typename A>
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
- if (is_empty()) return std::vector<T, A>(allocator_);
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if (num == 0) {
throw std::invalid_argument("num must be > 0");
}
@@ -808,19 +808,21 @@ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool in
template<typename T, typename C, typename A>
double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_rank(item, inclusive);
}
template<typename T, typename C, typename A>
auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_PMF(split_points, size, inclusive);
}
template<typename T, typename C, typename A>
auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_CDF(split_points, size, inclusive);
}
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 8e0eddd..3e3a884 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -61,15 +61,15 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
- REQUIRE(std::isnan(sketch.get_rank(0)));
- REQUIRE(std::isnan(sketch.get_min_item()));
- REQUIRE(std::isnan(sketch.get_max_item()));
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
const double fractions[3] {0, 0.5, 1};
- REQUIRE(sketch.get_quantiles(fractions, 3).empty());
+ REQUIRE_THROWS_AS(sketch.get_quantiles(fractions, 3).empty(), std::runtime_error);
const float split_points[1] {0};
- REQUIRE(sketch.get_PMF(split_points, 1).empty());
- REQUIRE(sketch.get_CDF(split_points, 1).empty());
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
for (auto it: sketch) {
unused(it);
@@ -304,8 +304,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -320,8 +320,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 0ec8286..f033b6a 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -20,7 +20,7 @@
#ifndef REQ_SKETCH_HPP_
#define REQ_SKETCH_HPP_
-#include <stdexcept>
+#include <iterator>
#include "req_common.hpp"
#include "req_compactor.hpp"
@@ -120,16 +120,14 @@ public:
/**
* Returns the min item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the min item of the stream
*/
const T& get_min_item() const;
/**
* Returns the max item of the stream.
- * For floating point types: if the sketch is empty this returns NaN.
- * For other types: if the sketch is empty this throws runtime_error.
+ * If the sketch is empty this throws std::runtime_error.
* @return the max item of the stream
*/
const T& get_max_item() const;
@@ -149,7 +147,7 @@ public:
/**
* Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
*
- * <p>If the sketch is empty the result is undefined (NaN).
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param item to be ranked.
* @param inclusive if true the weight of the given item is included into the rank.
@@ -164,7 +162,7 @@ public:
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
* given a set of split points (items).
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
@@ -186,7 +184,7 @@ public:
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
* cumulative analog of the PMF, of the input stream given a set of split points (items).
*
- * <p>If the sketch is empty this returns an empty vector.
+ * <p>If the sketch is empty this throws std::runtime_error.
*
* @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
@@ -209,6 +207,8 @@ public:
/**
* Returns an approximate quantile of the given normalized rank.
* The normalized rank must be in the range [0.0, 1.0] (both inclusive).
+ * <p>If the sketch is empty this throws std::runtime_error.
+ *
* @param rank of an item in the hypothetical sorted stream.
* @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
*
@@ -219,7 +219,11 @@ public:
/**
* Returns an array of quantiles that correspond to the given array of normalized ranks.
+ * <p>If the sketch is empty this throws std::runtime_error.
+ *
* @param ranks given array of normalized ranks.
+ * @param size the number of ranks in the array.
+ *
* @return array of quantiles that correspond to the given array of normalized ranks
*
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
@@ -380,24 +384,11 @@ private:
static void check_serial_version(uint8_t serial_version);
static void check_family_id(uint8_t family_id);
- // implementations for floating point types
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- static TT item = std::numeric_limits<TT>::quiet_NaN();
- return item;
- }
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(const TT& item) {
return !std::isnan(item);
}
- // implementations for all other types
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static const TT& get_invalid_item() {
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of items");
- }
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
static inline bool check_update_item(const TT&) {
return true;
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index e9aa9ed..62aed71 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -21,6 +21,7 @@
#define REQ_SKETCH_IMPL_HPP_
#include <sstream>
+#include <stdexcept>
namespace datasketches {
@@ -227,13 +228,13 @@ void req_sketch<T, C, A>::merge(FwdSk&& other) {
template<typename T, typename C, typename A>
const T& req_sketch<T, C, A>::get_min_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *min_item_;
}
template<typename T, typename C, typename A>
const T& req_sketch<T, C, A>::get_max_item() const {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
return *max_item_;
}
@@ -249,6 +250,7 @@ A req_sketch<T, C, A>::get_allocator() const {
template<typename T, typename C, typename A>
double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
uint64_t weight = 0;
for (const auto& compactor: compactors_) {
weight += compactor.compute_weight(item, inclusive);
@@ -258,19 +260,21 @@ double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
template<typename T, typename C, typename A>
auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_PMF(split_points, size, inclusive);
}
template<typename T, typename C, typename A>
auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
setup_sorted_view();
return sorted_view_->get_CDF(split_points, size, inclusive);
}
template<typename T, typename C, typename A>
auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
- if (is_empty()) return get_invalid_item();
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
if ((rank < 0.0) || (rank > 1.0)) {
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
@@ -281,8 +285,8 @@ auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> qua
template<typename T, typename C, typename A>
std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
std::vector<T, A> quantiles(allocator_);
- if (is_empty()) return quantiles;
quantiles.reserve(size);
// possible side-effect of sorting level zero
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index ab98863..9515962 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -43,19 +43,16 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
- REQUIRE(std::isnan(sketch.get_rank(0)));
- REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
- REQUIRE(std::isnan(sketch.get_min_item()));
- REQUIRE(std::isnan(sketch.get_max_item()));
- REQUIRE(std::isnan(sketch.get_quantile(0)));
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
- REQUIRE(std::isnan(sketch.get_quantile(1)));
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
const double ranks[3] {0, 0.5, 1};
- REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
const float split_points[1] {0};
- REQUIRE(sketch.get_CDF(split_points, 1).empty());
- REQUIRE(sketch.get_PMF(split_points, 1).empty());
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
}
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
@@ -203,8 +200,8 @@ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
}
TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
@@ -218,8 +215,8 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(std::isnan(sketch2.get_min_item()));
- REQUIRE(std::isnan(sketch2.get_max_item()));
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
}
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
@@ -363,8 +360,8 @@ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
- REQUIRE(std::isnan(sketch.get_min_item()));
- REQUIRE(std::isnan(sketch.get_max_item()));
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
}
TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org