You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/10/30 01:50:43 UTC

[incubator-datasketches-cpp] branch req_sketch updated: get_rank

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/req_sketch by this push:
     new f4ca4cd  get_rank
f4ca4cd is described below

commit f4ca4cd3e1252a8d205f8b41f30c05f09e7232f8
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Oct 29 18:50:16 2020 -0700

    get_rank
---
 req/include/req_sketch.hpp      | 17 +++++++++++++++++
 req/include/req_sketch_impl.hpp | 27 +++++++++++++++++++++++++++
 req/test/req_sketch_test.cpp    | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)

diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index f97c0c4..37707cf 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -46,6 +46,10 @@ public:
   bool is_sorted() const;
   uint32_t get_num_items() const;
   uint32_t get_nom_capacity() const;
+  //uint8_t get_lg_weight() const;
+
+  template<bool inclusive>
+  uint64_t compute_weight(const T& item) const;
 
   template<typename FwdT>
   void append(FwdT&& item);
@@ -116,6 +120,19 @@ public:
   void update(FwdT&& item);
 
   /**
+   * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
+   * With the template parameter inclusive=true the weight of the given item is included into the rank.
+   * Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
+   *
+   * <p>If the sketch is empty this returns NaN.
+   *
+   * @param item to be ranked
+   * @return an approximate rank of the given item
+   */
+  template<bool inclusive = false>
+  double get_rank(const T& item) const;
+
+  /**
    * Prints a summary of the sketch.
    * @param print_levels if true include information about levels
    * @param print_items if true include sketch data
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 132eeed..0de6da2 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -23,6 +23,7 @@
 #include <stdexcept>
 #include <cmath>
 #include <sstream>
+#include <algorithm>
 
 #include "count_zeros.hpp"
 
@@ -56,6 +57,21 @@ uint32_t req_compactor<T, H, C, A>::get_nom_capacity() const {
   return 2 * num_sections_ * section_size_;
 }
 
+//template<typename T, bool H, typename C, typename A>
+//uint8_t req_compactor<T, H, C, A>::get_lg_weight() const {
+//  return lg_weight_;
+//}
+
+template<typename T, bool H, typename C, typename A>
+template<bool inclusive>
+uint64_t req_compactor<T, H, C, A>::compute_weight(const T& item) const {
+  if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
+  auto it = inclusive ?
+      std::upper_bound(items_.begin(), items_.end(), item, C()) :
+      std::lower_bound(items_.begin(), items_.end(), item, C());
+  return std::distance(items_.begin(), it) << lg_weight_;
+}
+
 template<typename T, bool H, typename C, typename A>
 template<typename FwdT>
 void req_compactor<T, H, C, A>::append(FwdT&& item) {
@@ -199,6 +215,17 @@ void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
   //  aux = null;
 }
 
+
+template<typename T, bool H, typename C, typename S, typename A>
+template<bool inclusive>
+double req_sketch<T, H, C, S, A>::get_rank(const T& item) const {
+  uint64_t weight = 0;
+  for (const auto& compactor: compactors_) {
+    weight += compactor.template compute_weight<inclusive>(item);
+  }
+  return static_cast<double>(weight) / n_;
+}
+
 template<typename T, bool H, typename C, typename S, typename A>
 void req_sketch<T, H, C, S, A>::grow() {
   const uint8_t lg_weight = get_num_levels();
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 56098b0..5451fcc 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -21,6 +21,8 @@
 
 #include <req_sketch.hpp>
 
+#include <limits>
+
 namespace datasketches {
 
 #ifdef TEST_BINARY_INPUT_PATH
@@ -35,6 +37,8 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 0);
   REQUIRE(sketch.get_num_retained() == 0);
+  REQUIRE(std::isnan(sketch.get_rank(0)));
+  REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
 }
 
 TEST_CASE("req sketch: single value", "[req_sketch]") {
@@ -44,6 +48,28 @@ TEST_CASE("req sketch: single value", "[req_sketch]") {
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 1);
   REQUIRE(sketch.get_num_retained() == 1);
+  REQUIRE(sketch.get_rank(1) == 0);
+  REQUIRE(sketch.get_rank<true>(1) == 1);
+  REQUIRE(sketch.get_rank(1.1) == 1);
+  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
+}
+
+TEST_CASE("req sketch: repeated values", "[req_sketch]") {
+  req_sketch<float, true> sketch(100);
+  sketch.update(1);
+  sketch.update(1);
+  sketch.update(1);
+  sketch.update(2);
+  sketch.update(2);
+  sketch.update(2);
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_n() == 6);
+  REQUIRE(sketch.get_num_retained() == 6);
+  REQUIRE(sketch.get_rank(1) == 0);
+  REQUIRE(sketch.get_rank<true>(1) == 0.5);
+  REQUIRE(sketch.get_rank(2) == 0.5);
+  REQUIRE(sketch.get_rank<true>(2) == 1);
 }
 
 TEST_CASE("req sketch: exact mode", "[req_sketch]") {
@@ -53,6 +79,8 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
   REQUIRE_FALSE(sketch.is_estimation_mode());
   REQUIRE(sketch.get_n() == 100);
   REQUIRE(sketch.get_num_retained() == 100);
+  REQUIRE(sketch.get_rank(50) == 0.5);
+  REQUIRE(sketch.get_rank<true>(49) == 0.5);
 }
 
 TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
@@ -65,6 +93,10 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
   REQUIRE(sketch.get_n() == n);
   std::cout << sketch.to_string(true, true);
   REQUIRE(sketch.get_num_retained() < n);
+  REQUIRE(sketch.get_rank(0) == 0);
+  REQUIRE(sketch.get_rank(n) == 1);
+  REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
+  REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
 }
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org