You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/26 17:45:30 UTC

[incubator-datasketches-characterization] 01/01: measure out-of-bounds rate

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch bounds
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit 7978e65663138dd6fbdf5079b87c084750d746b4
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Mar 26 10:45:05 2020 -0700

    measure out-of-bounds rate
---
 cpp/src/distinct_count_accuracy_profile.cpp | 47 +++++++++++++++++++++++++++++
 cpp/src/distinct_count_accuracy_profile.hpp | 14 +++++++++
 cpp/src/hll_union_accuracy_profile.cpp      | 12 ++++++--
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/cpp/src/distinct_count_accuracy_profile.cpp b/cpp/src/distinct_count_accuracy_profile.cpp
index 5b822ef..2cfc335 100644
--- a/cpp/src/distinct_count_accuracy_profile.cpp
+++ b/cpp/src/distinct_count_accuracy_profile.cpp
@@ -33,6 +33,12 @@ sum_est(0),
 sum_rel_err(0),
 sum_sq_rel_err(0),
 count(0),
+below_lb1_cnt(0),
+below_lb2_cnt(0),
+below_lb3_cnt(0),
+above_ub1_cnt(0),
+above_ub2_cnt(0),
+above_ub3_cnt(0),
 rel_err_distribution(k)
 {}
 
@@ -45,6 +51,17 @@ void accuracy_stats::update(double estimate) {
   count++;
 }
 
+void accuracy_stats::update(double estimate, double lb1, double lb2, double lb3,
+    double ub1, double ub2, double ub3) {
+  update(estimate);
+  if (true_value < lb1) below_lb1_cnt++;
+  if (true_value < lb2) below_lb2_cnt++;
+  if (true_value < lb3) below_lb3_cnt++;
+  if (true_value > ub1) above_ub1_cnt++;
+  if (true_value > ub2) above_ub2_cnt++;
+  if (true_value > ub3) above_ub3_cnt++;
+}
+
 size_t accuracy_stats::get_true_value() const {
   return true_value;
 }
@@ -65,6 +82,30 @@ size_t accuracy_stats::get_count() const {
   return count;
 }
 
+double accuracy_stats::get_below_lb1_ratio() const {
+  return static_cast<double>(below_lb1_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb2_ratio() const {
+  return static_cast<double>(below_lb2_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb3_ratio() const {
+  return static_cast<double>(below_lb3_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub1_ratio() const {
+  return static_cast<double>(above_ub1_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub2_ratio() const {
+  return static_cast<double>(above_ub2_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub3_ratio() const {
+  return static_cast<double>(above_ub3_cnt) / count;
+}
+
 std::vector<double> accuracy_stats::get_quantiles(
     const double* fractions, size_t size) const {
   return rel_err_distribution.get_quantiles(fractions, size);
@@ -159,6 +200,12 @@ void distinct_count_accuracy_profile::print_stats() const {
       std::cout << quantile;
       if (i != FRACT_LEN - 1) std::cout << "\t";
     }
+    std::cout << "\t" << stat.get_below_lb1_ratio();
+    std::cout << "\t" << stat.get_below_lb2_ratio();
+    std::cout << "\t" << stat.get_below_lb3_ratio();
+    std::cout << "\t" << stat.get_above_ub1_ratio();
+    std::cout << "\t" << stat.get_above_ub2_ratio();
+    std::cout << "\t" << stat.get_above_ub3_ratio();
     std::cout << std::endl;
   }
 }
diff --git a/cpp/src/distinct_count_accuracy_profile.hpp b/cpp/src/distinct_count_accuracy_profile.hpp
index 2f39a7e..0ddb940 100644
--- a/cpp/src/distinct_count_accuracy_profile.hpp
+++ b/cpp/src/distinct_count_accuracy_profile.hpp
@@ -40,11 +40,19 @@ class accuracy_stats {
 public:
   accuracy_stats(size_t k, size_t true_value);
   void update(double estimate);
+  void update(double estimate, double lb1, double lb2, double lb3,
+      double ub1, double ub2, double ub3);
   size_t get_true_value() const;
   double get_mean_est() const;
   double get_mean_rel_err() const;
   double get_rms_rel_err() const;
   size_t get_count() const;
+  double get_below_lb1_ratio() const;
+  double get_below_lb2_ratio() const;
+  double get_below_lb3_ratio() const;
+  double get_above_ub1_ratio() const;
+  double get_above_ub2_ratio() const;
+  double get_above_ub3_ratio() const;
   std::vector<double> get_quantiles(const double* fractions, size_t size) const;
 
 private:
@@ -53,6 +61,12 @@ private:
   double sum_rel_err;
   double sum_sq_rel_err;
   size_t count;
+  size_t below_lb1_cnt;
+  size_t below_lb2_cnt;
+  size_t below_lb3_cnt;
+  size_t above_ub1_cnt;
+  size_t above_ub2_cnt;
+  size_t above_ub3_cnt;
   kll_sketch<double> rel_err_distribution;
 };
 
diff --git a/cpp/src/hll_union_accuracy_profile.cpp b/cpp/src/hll_union_accuracy_profile.cpp
index 96f886f..c1f8bae 100644
--- a/cpp/src/hll_union_accuracy_profile.cpp
+++ b/cpp/src/hll_union_accuracy_profile.cpp
@@ -43,9 +43,17 @@ void hll_union_accuracy_profile::run_trial() {
     }
     count += delta;
     for (auto& sketch: sketches) {
-      u.update(*sketch);
+      u.update(std::move(*sketch));
     }
-    stat.update(u.get_result().get_estimate());
+    stat.update(
+      u.get_estimate(),
+      u.get_lower_bound(1),
+      u.get_lower_bound(2),
+      u.get_lower_bound(3),
+      u.get_upper_bound(1),
+      u.get_upper_bound(2),
+      u.get_upper_bound(3)
+    );
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org