You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/26 17:45:29 UTC

[incubator-datasketches-characterization] branch bounds created (now 7978e65)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch bounds
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git.


      at 7978e65  measure out-of-bounds rate

This branch includes the following new commits:

     new 7978e65  measure out-of-bounds rate

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-characterization] 01/01: measure out-of-bounds rate

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch bounds
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit 7978e65663138dd6fbdf5079b87c084750d746b4
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Mar 26 10:45:05 2020 -0700

    measure out-of-bounds rate
---
 cpp/src/distinct_count_accuracy_profile.cpp | 47 +++++++++++++++++++++++++++++
 cpp/src/distinct_count_accuracy_profile.hpp | 14 +++++++++
 cpp/src/hll_union_accuracy_profile.cpp      | 12 ++++++--
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/cpp/src/distinct_count_accuracy_profile.cpp b/cpp/src/distinct_count_accuracy_profile.cpp
index 5b822ef..2cfc335 100644
--- a/cpp/src/distinct_count_accuracy_profile.cpp
+++ b/cpp/src/distinct_count_accuracy_profile.cpp
@@ -33,6 +33,12 @@ sum_est(0),
 sum_rel_err(0),
 sum_sq_rel_err(0),
 count(0),
+below_lb1_cnt(0),
+below_lb2_cnt(0),
+below_lb3_cnt(0),
+above_ub1_cnt(0),
+above_ub2_cnt(0),
+above_ub3_cnt(0),
 rel_err_distribution(k)
 {}
 
@@ -45,6 +51,17 @@ void accuracy_stats::update(double estimate) {
   count++;
 }
 
+void accuracy_stats::update(double estimate, double lb1, double lb2, double lb3,
+    double ub1, double ub2, double ub3) {
+  update(estimate);
+  if (true_value < lb1) below_lb1_cnt++;
+  if (true_value < lb2) below_lb2_cnt++;
+  if (true_value < lb3) below_lb3_cnt++;
+  if (true_value > ub1) above_ub1_cnt++;
+  if (true_value > ub2) above_ub2_cnt++;
+  if (true_value > ub3) above_ub3_cnt++;
+}
+
 size_t accuracy_stats::get_true_value() const {
   return true_value;
 }
@@ -65,6 +82,30 @@ size_t accuracy_stats::get_count() const {
   return count;
 }
 
+double accuracy_stats::get_below_lb1_ratio() const {
+  return static_cast<double>(below_lb1_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb2_ratio() const {
+  return static_cast<double>(below_lb2_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb3_ratio() const {
+  return static_cast<double>(below_lb3_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub1_ratio() const {
+  return static_cast<double>(above_ub1_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub2_ratio() const {
+  return static_cast<double>(above_ub2_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub3_ratio() const {
+  return static_cast<double>(above_ub3_cnt) / count;
+}
+
 std::vector<double> accuracy_stats::get_quantiles(
     const double* fractions, size_t size) const {
   return rel_err_distribution.get_quantiles(fractions, size);
@@ -159,6 +200,12 @@ void distinct_count_accuracy_profile::print_stats() const {
       std::cout << quantile;
       if (i != FRACT_LEN - 1) std::cout << "\t";
     }
+    std::cout << "\t" << stat.get_below_lb1_ratio();
+    std::cout << "\t" << stat.get_below_lb2_ratio();
+    std::cout << "\t" << stat.get_below_lb3_ratio();
+    std::cout << "\t" << stat.get_above_ub1_ratio();
+    std::cout << "\t" << stat.get_above_ub2_ratio();
+    std::cout << "\t" << stat.get_above_ub3_ratio();
     std::cout << std::endl;
   }
 }
diff --git a/cpp/src/distinct_count_accuracy_profile.hpp b/cpp/src/distinct_count_accuracy_profile.hpp
index 2f39a7e..0ddb940 100644
--- a/cpp/src/distinct_count_accuracy_profile.hpp
+++ b/cpp/src/distinct_count_accuracy_profile.hpp
@@ -40,11 +40,19 @@ class accuracy_stats {
 public:
   accuracy_stats(size_t k, size_t true_value);
   void update(double estimate);
+  void update(double estimate, double lb1, double lb2, double lb3,
+      double ub1, double ub2, double ub3);
   size_t get_true_value() const;
   double get_mean_est() const;
   double get_mean_rel_err() const;
   double get_rms_rel_err() const;
   size_t get_count() const;
+  double get_below_lb1_ratio() const;
+  double get_below_lb2_ratio() const;
+  double get_below_lb3_ratio() const;
+  double get_above_ub1_ratio() const;
+  double get_above_ub2_ratio() const;
+  double get_above_ub3_ratio() const;
   std::vector<double> get_quantiles(const double* fractions, size_t size) const;
 
 private:
@@ -53,6 +61,12 @@ private:
   double sum_rel_err;
   double sum_sq_rel_err;
   size_t count;
+  size_t below_lb1_cnt;
+  size_t below_lb2_cnt;
+  size_t below_lb3_cnt;
+  size_t above_ub1_cnt;
+  size_t above_ub2_cnt;
+  size_t above_ub3_cnt;
   kll_sketch<double> rel_err_distribution;
 };
 
diff --git a/cpp/src/hll_union_accuracy_profile.cpp b/cpp/src/hll_union_accuracy_profile.cpp
index 96f886f..c1f8bae 100644
--- a/cpp/src/hll_union_accuracy_profile.cpp
+++ b/cpp/src/hll_union_accuracy_profile.cpp
@@ -43,9 +43,17 @@ void hll_union_accuracy_profile::run_trial() {
     }
     count += delta;
     for (auto& sketch: sketches) {
-      u.update(*sketch);
+      u.update(std::move(*sketch));
     }
-    stat.update(u.get_result().get_estimate());
+    stat.update(
+      u.get_estimate(),
+      u.get_lower_bound(1),
+      u.get_lower_bound(2),
+      u.get_lower_bound(3),
+      u.get_upper_bound(1),
+      u.get_upper_bound(2),
+      u.get_upper_bound(3)
+    );
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org