You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/03/26 17:45:30 UTC
[incubator-datasketches-characterization] 01/01: measure
out-of-bounds rate
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch bounds
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
commit 7978e65663138dd6fbdf5079b87c084750d746b4
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Mar 26 10:45:05 2020 -0700
measure out-of-bounds rate
---
cpp/src/distinct_count_accuracy_profile.cpp | 47 +++++++++++++++++++++++++++++
cpp/src/distinct_count_accuracy_profile.hpp | 14 +++++++++
cpp/src/hll_union_accuracy_profile.cpp | 12 ++++++--
3 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/cpp/src/distinct_count_accuracy_profile.cpp b/cpp/src/distinct_count_accuracy_profile.cpp
index 5b822ef..2cfc335 100644
--- a/cpp/src/distinct_count_accuracy_profile.cpp
+++ b/cpp/src/distinct_count_accuracy_profile.cpp
@@ -33,6 +33,12 @@ sum_est(0),
sum_rel_err(0),
sum_sq_rel_err(0),
count(0),
+below_lb1_cnt(0),
+below_lb2_cnt(0),
+below_lb3_cnt(0),
+above_ub1_cnt(0),
+above_ub2_cnt(0),
+above_ub3_cnt(0),
rel_err_distribution(k)
{}
@@ -45,6 +51,17 @@ void accuracy_stats::update(double estimate) {
count++;
}
+void accuracy_stats::update(double estimate, double lb1, double lb2, double lb3,
+ double ub1, double ub2, double ub3) {
+ update(estimate);
+ if (true_value < lb1) below_lb1_cnt++;
+ if (true_value < lb2) below_lb2_cnt++;
+ if (true_value < lb3) below_lb3_cnt++;
+ if (true_value > ub1) above_ub1_cnt++;
+ if (true_value > ub2) above_ub2_cnt++;
+ if (true_value > ub3) above_ub3_cnt++;
+}
+
size_t accuracy_stats::get_true_value() const {
return true_value;
}
@@ -65,6 +82,30 @@ size_t accuracy_stats::get_count() const {
return count;
}
+double accuracy_stats::get_below_lb1_ratio() const {
+ return static_cast<double>(below_lb1_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb2_ratio() const {
+ return static_cast<double>(below_lb2_cnt) / count;
+}
+
+double accuracy_stats::get_below_lb3_ratio() const {
+ return static_cast<double>(below_lb3_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub1_ratio() const {
+ return static_cast<double>(above_ub1_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub2_ratio() const {
+ return static_cast<double>(above_ub2_cnt) / count;
+}
+
+double accuracy_stats::get_above_ub3_ratio() const {
+ return static_cast<double>(above_ub3_cnt) / count;
+}
+
std::vector<double> accuracy_stats::get_quantiles(
const double* fractions, size_t size) const {
return rel_err_distribution.get_quantiles(fractions, size);
@@ -159,6 +200,12 @@ void distinct_count_accuracy_profile::print_stats() const {
std::cout << quantile;
if (i != FRACT_LEN - 1) std::cout << "\t";
}
+ std::cout << "\t" << stat.get_below_lb1_ratio();
+ std::cout << "\t" << stat.get_below_lb2_ratio();
+ std::cout << "\t" << stat.get_below_lb3_ratio();
+ std::cout << "\t" << stat.get_above_ub1_ratio();
+ std::cout << "\t" << stat.get_above_ub2_ratio();
+ std::cout << "\t" << stat.get_above_ub3_ratio();
std::cout << std::endl;
}
}
diff --git a/cpp/src/distinct_count_accuracy_profile.hpp b/cpp/src/distinct_count_accuracy_profile.hpp
index 2f39a7e..0ddb940 100644
--- a/cpp/src/distinct_count_accuracy_profile.hpp
+++ b/cpp/src/distinct_count_accuracy_profile.hpp
@@ -40,11 +40,19 @@ class accuracy_stats {
public:
accuracy_stats(size_t k, size_t true_value);
void update(double estimate);
+ void update(double estimate, double lb1, double lb2, double lb3,
+ double ub1, double ub2, double ub3);
size_t get_true_value() const;
double get_mean_est() const;
double get_mean_rel_err() const;
double get_rms_rel_err() const;
size_t get_count() const;
+ double get_below_lb1_ratio() const;
+ double get_below_lb2_ratio() const;
+ double get_below_lb3_ratio() const;
+ double get_above_ub1_ratio() const;
+ double get_above_ub2_ratio() const;
+ double get_above_ub3_ratio() const;
std::vector<double> get_quantiles(const double* fractions, size_t size) const;
private:
@@ -53,6 +61,12 @@ private:
double sum_rel_err;
double sum_sq_rel_err;
size_t count;
+ size_t below_lb1_cnt;
+ size_t below_lb2_cnt;
+ size_t below_lb3_cnt;
+ size_t above_ub1_cnt;
+ size_t above_ub2_cnt;
+ size_t above_ub3_cnt;
kll_sketch<double> rel_err_distribution;
};
diff --git a/cpp/src/hll_union_accuracy_profile.cpp b/cpp/src/hll_union_accuracy_profile.cpp
index 96f886f..c1f8bae 100644
--- a/cpp/src/hll_union_accuracy_profile.cpp
+++ b/cpp/src/hll_union_accuracy_profile.cpp
@@ -43,9 +43,17 @@ void hll_union_accuracy_profile::run_trial() {
}
count += delta;
for (auto& sketch: sketches) {
- u.update(*sketch);
+ u.update(std::move(*sketch));
}
- stat.update(u.get_result().get_estimate());
+ stat.update(
+ u.get_estimate(),
+ u.get_lower_bound(1),
+ u.get_lower_bound(2),
+ u.get_lower_bound(3),
+ u.get_upper_bound(1),
+ u.get_upper_bound(2),
+ u.get_upper_bound(3)
+ );
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org