You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/10/30 01:50:43 UTC
[incubator-datasketches-cpp] branch req_sketch updated: get_rank
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/req_sketch by this push:
new f4ca4cd get_rank
f4ca4cd is described below
commit f4ca4cd3e1252a8d205f8b41f30c05f09e7232f8
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Oct 29 18:50:16 2020 -0700
get_rank
---
req/include/req_sketch.hpp | 17 +++++++++++++++++
req/include/req_sketch_impl.hpp | 27 +++++++++++++++++++++++++++
req/test/req_sketch_test.cpp | 32 ++++++++++++++++++++++++++++++++
3 files changed, 76 insertions(+)
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index f97c0c4..37707cf 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -46,6 +46,10 @@ public:
bool is_sorted() const;
uint32_t get_num_items() const;
uint32_t get_nom_capacity() const;
+ //uint8_t get_lg_weight() const;
+
+ template<bool inclusive>
+ uint64_t compute_weight(const T& item) const;
template<typename FwdT>
void append(FwdT&& item);
@@ -116,6 +120,19 @@ public:
void update(FwdT&& item);
/**
+ * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
+ * With the template parameter inclusive=true the weight of the given item is included into the rank.
+ * Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
+ *
+ * <p>If the sketch is empty this returns NaN.
+ *
+ * @param item to be ranked
+ * @return an approximate rank of the given item
+ */
+ template<bool inclusive = false>
+ double get_rank(const T& item) const;
+
+ /**
* Prints a summary of the sketch.
* @param print_levels if true include information about levels
* @param print_items if true include sketch data
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 132eeed..0de6da2 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -23,6 +23,7 @@
#include <stdexcept>
#include <cmath>
#include <sstream>
+#include <algorithm>
#include "count_zeros.hpp"
@@ -56,6 +57,21 @@ uint32_t req_compactor<T, H, C, A>::get_nom_capacity() const {
return 2 * num_sections_ * section_size_;
}
+//template<typename T, bool H, typename C, typename A>
+//uint8_t req_compactor<T, H, C, A>::get_lg_weight() const {
+// return lg_weight_;
+//}
+
+template<typename T, bool H, typename C, typename A>
+template<bool inclusive>
+uint64_t req_compactor<T, H, C, A>::compute_weight(const T& item) const {
+ if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
+ auto it = inclusive ?
+ std::upper_bound(items_.begin(), items_.end(), item, C()) :
+ std::lower_bound(items_.begin(), items_.end(), item, C());
+ return std::distance(items_.begin(), it) << lg_weight_;
+}
+
template<typename T, bool H, typename C, typename A>
template<typename FwdT>
void req_compactor<T, H, C, A>::append(FwdT&& item) {
@@ -199,6 +215,17 @@ void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
// aux = null;
}
+
+template<typename T, bool H, typename C, typename S, typename A>
+template<bool inclusive>
+double req_sketch<T, H, C, S, A>::get_rank(const T& item) const {
+ uint64_t weight = 0;
+ for (const auto& compactor: compactors_) {
+ weight += compactor.template compute_weight<inclusive>(item);
+ }
+ return static_cast<double>(weight) / n_;
+}
+
template<typename T, bool H, typename C, typename S, typename A>
void req_sketch<T, H, C, S, A>::grow() {
const uint8_t lg_weight = get_num_levels();
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 56098b0..5451fcc 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -21,6 +21,8 @@
#include <req_sketch.hpp>
+#include <limits>
+
namespace datasketches {
#ifdef TEST_BINARY_INPUT_PATH
@@ -35,6 +37,8 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
+ REQUIRE(std::isnan(sketch.get_rank(0)));
+ REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
}
TEST_CASE("req sketch: single value", "[req_sketch]") {
@@ -44,6 +48,28 @@ TEST_CASE("req sketch: single value", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
+ REQUIRE(sketch.get_rank(1) == 0);
+ REQUIRE(sketch.get_rank<true>(1) == 1);
+ REQUIRE(sketch.get_rank(1.1) == 1);
+ REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
+}
+
+TEST_CASE("req sketch: repeated values", "[req_sketch]") {
+ req_sketch<float, true> sketch(100);
+ sketch.update(1);
+ sketch.update(1);
+ sketch.update(1);
+ sketch.update(2);
+ sketch.update(2);
+ sketch.update(2);
+ REQUIRE_FALSE(sketch.is_empty());
+ REQUIRE_FALSE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_n() == 6);
+ REQUIRE(sketch.get_num_retained() == 6);
+ REQUIRE(sketch.get_rank(1) == 0);
+ REQUIRE(sketch.get_rank<true>(1) == 0.5);
+ REQUIRE(sketch.get_rank(2) == 0.5);
+ REQUIRE(sketch.get_rank<true>(2) == 1);
}
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
@@ -53,6 +79,8 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 100);
REQUIRE(sketch.get_num_retained() == 100);
+ REQUIRE(sketch.get_rank(50) == 0.5);
+ REQUIRE(sketch.get_rank<true>(49) == 0.5);
}
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
@@ -65,6 +93,10 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
REQUIRE(sketch.get_n() == n);
std::cout << sketch.to_string(true, true);
REQUIRE(sketch.get_num_retained() < n);
+ REQUIRE(sketch.get_rank(0) == 0);
+ REQUIRE(sketch.get_rank(n) == 1);
+ REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
+ REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org