You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/03/25 16:20:19 UTC
[incubator-datasketches-cpp] 01/01: replace std::function with
template for subset sum estimates, inline internal methods where appropriate
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch vo_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit f5a98884ab9776dcc606152afaa678f24264039f
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Wed Mar 25 09:19:57 2020 -0700
replace std::function with template for subset sum estimates, inline internal methods where appropriate
---
common/include/bounds_binomial_proportions.hpp | 26 +++++------
sampling/include/var_opt_sketch.hpp | 63 +++++++++++++-------------
sampling/include/var_opt_sketch_impl.hpp | 5 +-
3 files changed, 48 insertions(+), 46 deletions(-)
diff --git a/common/include/bounds_binomial_proportions.hpp b/common/include/bounds_binomial_proportions.hpp
index 8b00cbd..06ab484 100644
--- a/common/include/bounds_binomial_proportions.hpp
+++ b/common/include/bounds_binomial_proportions.hpp
@@ -110,7 +110,7 @@ public:
* @return the lower bound of the approximate Clopper-Pearson confidence interval for the
* unknown success probability.
*/
- static double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
+ static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
check_inputs(n, k);
if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
else if (k == 0) { return 0.0; }
@@ -145,7 +145,7 @@ public:
* @return the upper bound of the approximate Clopper-Pearson confidence interval for the
* unknown success probability.
*/
- static double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
+ static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
check_inputs(n, k);
if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
else if (k == n) { return 1.0; }
@@ -167,7 +167,7 @@ public:
* @param k is the number of successes. Must be non-negative, and cannot exceed n.
* @return the estimate of the unknown binomial proportion.
*/
- static double estimate_unknown_p(long n, long k) {
+ static inline double estimate_unknown_p(long n, long k) {
check_inputs(n, k);
if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
else { return ((double) k / (double) n); }
@@ -178,7 +178,7 @@ public:
* @param x is the input to the erf function
* @return returns erf(x), accurate to roughly 7 decimal digits.
*/
- static double erf(double x) {
+ static inline double erf(double x) {
if (x < 0.0) { return (-1.0 * (erf_of_nonneg(-1.0 * x))); }
else { return (erf_of_nonneg(x)); }
}
@@ -188,12 +188,12 @@ public:
* @param x is the input to the normal_cdf function
* @return returns the approximation to normalCDF(x).
*/
- static double normal_cdf(double x) {
+ static inline double normal_cdf(double x) {
return (0.5 * (1.0 + (erf(x / (sqrt(2.0))))));
}
private:
- static void check_inputs(long n, long k) {
+ static inline void check_inputs(long n, long k) {
if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
@@ -201,7 +201,7 @@ private:
//@formatter:off
// Abramowitz and Stegun formula 7.1.28, p. 88; Claims accuracy of about 7 decimal digits */
- static double erf_of_nonneg(double x) {
+ static inline double erf_of_nonneg(double x) {
// The constants that appear below, formatted for easy checking against the book.
// a1 = 0.07052 30784
// a3 = 0.00927 05272
@@ -234,7 +234,7 @@ private:
return (1.0 - (1.0 / sum16));
}
- static double delta_of_num_stdevs(double kappa) {
+ static inline double delta_of_num_stdevs(double kappa) {
return (normal_cdf(-1.0 * kappa));
}
@@ -251,7 +251,7 @@ private:
// and it is worth keeping it that way so that it will always be easy to verify
// that the formula was typed in correctly.
- static double abramowitz_stegun_formula_26p5p22(double a, double b,
+ static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
double yp) {
const double b2m1 = (2.0 * b) - 1.0;
const double a2m1 = (2.0 * a) - 1.0;
@@ -268,19 +268,19 @@ private:
// Formulas for some special cases.
- static double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
+ static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
return (1.0 - pow(delta, (1.0 / n)));
}
- static double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
+ static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
return (pow(delta, (1.0 / n)));
}
- static double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
+ static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
return (1.0 - pow((1.0 - delta), (1.0 / n)));
}
- static double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
+ static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
return (pow((1.0 - delta), (1.0 / n)));
}
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index a577200..d45b0aa 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -69,20 +69,20 @@ class var_opt_sketch {
void update(const T& item, double weight=1.0);
//void update(T&& item, double weight=1.0);
- uint32_t get_k() const;
- uint64_t get_n() const;
- uint32_t get_num_samples() const;
+ inline uint32_t get_k() const;
+ inline uint64_t get_n() const;
+ inline uint32_t get_num_samples() const;
- bool is_empty() const;
+ inline bool is_empty() const;
void reset();
// version for fixed-size arithmetic types (integer, floating point)
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
- size_t get_serialized_size_bytes() const;
+ inline size_t get_serialized_size_bytes() const;
// version for all other types
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
- size_t get_serialized_size_bytes() const;
+ inline size_t get_serialized_size_bytes() const;
std::vector<uint8_t, AllocU8<A>> serialize(unsigned header_size_bytes = 0) const;
void serialize(std::ostream& os) const;
@@ -95,7 +95,8 @@ class var_opt_sketch {
std::ostream& items_to_stream(std::ostream& os) const;
std::string items_to_string() const;
- subset_summary estimate_subset_sum(std::function<bool(T)> predicate) const;
+ template<typename P>
+ subset_summary estimate_subset_sum(P predicate) const;
class const_iterator;
const_iterator begin() const;
@@ -163,32 +164,32 @@ class var_opt_sketch {
var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r);
// internal-use-only updates
- void update(const T& item, double weight, bool mark);
- void update_warmup_phase(const T& item, double weight, bool mark);
- void update_light(const T& item, double weight, bool mark);
- void update_heavy_r_eq1(const T& item, double weight, bool mark);
- void update_heavy_general(const T& item, double weight, bool mark);
-
- double get_tau() const;
- double peek_min() const;
- bool is_marked(int idx) const;
+ inline void update(const T& item, double weight, bool mark);
+ inline void update_warmup_phase(const T& item, double weight, bool mark);
+ inline void update_light(const T& item, double weight, bool mark);
+ inline void update_heavy_r_eq1(const T& item, double weight, bool mark);
+ inline void update_heavy_general(const T& item, double weight, bool mark);
+
+ inline double get_tau() const;
+ inline double peek_min() const;
+ inline bool is_marked(int idx) const;
- uint32_t pick_random_slot_in_r() const;
- uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
- uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
-
- void transition_from_warmup();
- void convert_to_heap();
- void restore_towards_leaves(int slot_in);
- void restore_towards_root(int slot_in);
- void push(const T& item, double wt, bool mark);
- void pop_min_to_m_region();
+ inline uint32_t pick_random_slot_in_r() const;
+ inline uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
+ inline uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
+
+ inline void transition_from_warmup();
+ inline void convert_to_heap();
+ inline void restore_towards_leaves(int slot_in);
+ inline void restore_towards_root(int slot_in);
+ inline void push(const T& item, double wt, bool mark);
+ inline void pop_min_to_m_region();
void grow_candidate_set(double wt_cands, int num_cands);
void decrease_k_by_1();
void strip_marks();
void force_set_k(int k); // used to resolve union gadget into sketch
void downsample_candidate_set(double wt_cands, int num_cands);
- void swap_values(int src, int dst);
+ inline void swap_values(int src, int dst);
void grow_data_arrays();
void allocate_data_arrays(uint32_t tgt_size, bool use_marks);
@@ -201,14 +202,14 @@ class var_opt_sketch {
// things to move to common utils and share among sketches
static uint32_t get_adjusted_size(int max_size, int resize_target);
static uint32_t starting_sub_multiple(int lg_target, int lg_rf, int lg_min);
- static double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
- static double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
+ static inline double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
+ static inline double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
static bool is_power_of_2(uint32_t v);
static uint32_t to_log_2(uint32_t v);
static uint32_t count_trailing_zeros(uint32_t v);
static uint32_t ceiling_power_of_2(uint32_t n);
- static uint32_t next_int(uint32_t max_value);
- static double next_double_exclude_zero();
+ static inline uint32_t next_int(uint32_t max_value);
+ static inline double next_double_exclude_zero();
};
template<typename T, typename S, typename A>
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index 93f10b7..f65dbba 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -1325,7 +1325,8 @@ void var_opt_sketch<T, S, A>::validate_and_set_current_size(int preamble_longs)
template<typename T, typename S, typename A>
-subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T)> predicate) const {
+template<typename P>
+subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
if (n_ == 0) {
return {0.0, 0.0, 0.0, 0.0};
}
@@ -1363,7 +1364,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T
double lb_true_fraction = pseudo_hypergeometric_lb_on_p(r_, r_true_count, effective_sampling_rate);
double estimated_true_fraction = (1.0 * r_true_count) / r_;
double ub_true_fraction = pseudo_hypergeometric_ub_on_p(r_, r_true_count, effective_sampling_rate);
-
+
return { h_true_wt + (total_wt_r_ * lb_true_fraction),
h_true_wt + (total_wt_r_ * estimated_true_fraction),
h_true_wt + (total_wt_r_ * ub_true_fraction),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org