You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/19 00:05:54 UTC
[incubator-datasketches-cpp] 02/02: fixed quantile calculator

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 255c2d0abba97282057012ffe3a469e2e0840b4a
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Nov 18 16:05:39 2020 -0800

    fixed quantile calculator
---
 req/include/req_quantile_calculator.hpp      |  8 ++++++++
 req/include/req_quantile_calculator_impl.hpp | 18 +++++++++---------
 req/include/req_sketch_impl.hpp              |  2 +-
 req/test/req_sketch_test.cpp                 |  5 +++--
 4 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/req/include/req_quantile_calculator.hpp b/req/include/req_quantile_calculator.hpp
index ccf901c..f6cc13e 100755
--- a/req/include/req_quantile_calculator.hpp
+++ b/req/include/req_quantile_calculator.hpp
@@ -26,6 +26,7 @@ namespace datasketches {
 
 template<
   typename T,
+  typename Comparator,
   typename Allocator
 >
 class req_quantile_calculator {
@@ -44,6 +45,13 @@ private:
   using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
   using Container = std::vector<Entry, AllocEntry>;
 
+  template<typename C>
+  struct compare_pairs_by_first_ptr {
+    bool operator()(const Entry& a, const Entry& b) {
+      return C()(*a.first, *b.first);
+    }
+  };
+
   struct compare_pairs_by_second {
     bool operator()(const Entry& a, const Entry& b) {
       return a.second < b.second;
diff --git a/req/include/req_quantile_calculator_impl.hpp b/req/include/req_quantile_calculator_impl.hpp
index fe6fcc9..8441c02 100755
--- a/req/include/req_quantile_calculator_impl.hpp
+++ b/req/include/req_quantile_calculator_impl.hpp
@@ -22,23 +22,23 @@
 
 namespace datasketches {
 
-template<typename T, typename A>
-req_quantile_calculator<T, A>::req_quantile_calculator(uint64_t n, const A& allocator):
+template<typename T, typename C, typename A>
+req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
 n_(n),
 entries_(allocator)
 {}
 
-template<typename T, typename A>
-void req_quantile_calculator<T, A>::add(const std::vector<T, A>& items, uint8_t lg_weight) {
+template<typename T, typename C, typename A>
+void req_quantile_calculator<T, C, A>::add(const std::vector<T, A>& items, uint8_t lg_weight) {
   if (entries_.capacity() < entries_.size() + items.size()) entries_.reserve(entries_.size() + items.size());
   auto middle = entries_.end();
   for (const auto& item: items) entries_.push_back(Entry(&item, 1 << lg_weight));
-  std::inplace_merge(entries_.begin(), middle, entries_.end(), compare_pairs_by_second());
+  std::inplace_merge(entries_.begin(), middle, entries_.end(), compare_pairs_by_first_ptr<C>());
 }
 
-template<typename T, typename A>
+template<typename T, typename C, typename A>
 template<bool inclusive>
-void req_quantile_calculator<T, A>::convert_to_cummulative() {
+void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
   uint64_t subtotal = 0;
   for (auto& entry: entries_) {
     const uint64_t new_subtotal = subtotal + entry.second;
@@ -47,8 +47,8 @@ void req_quantile_calculator<T, A>::convert_to_cummulative() {
   }
 }
 
-template<typename T, typename A>
-const T& req_quantile_calculator<T, A>::get_quantile(double rank) const {
+template<typename T, typename C, typename A>
+const T& req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
   uint64_t weight = static_cast<uint64_t>(rank * n_);
   auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
   if (it == entries_.end()) return *(entries_[entries_.size() - 1].first);
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 96e10c2..fd4b445 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -204,7 +204,7 @@ const T& req_sketch<T, H, C, S, A>::get_quantile(double rank) const {
   if (!compactors_[0].is_sorted()) {
     const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
   }
-  req_quantile_calculator<T, A> quantile_calculator(n_, allocator_);
+  req_quantile_calculator<T, C, A> quantile_calculator(n_, allocator_);
   for (auto& compactor: compactors_) {
     quantile_calculator.add(compactor.get_items(), compactor.get_lg_weight());
   }
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index e0e6c9a..49cd776 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -374,10 +374,11 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
   for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
 
   sketch1.merge(sketch2);
-  //std::cout << sketch1.to_string(true, true);
   REQUIRE(sketch1.get_min_value() == 0);
   REQUIRE(sketch1.get_max_value() == 1999);
-  //REQUIRE(sketch1.get_quantile(0.5) == 1000);
+  REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
+  REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
+  REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
   REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org