You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/03/25 16:20:19 UTC
[incubator-datasketches-cpp] 01/01: replace std::function with template for subset sum estimates, inline internal methods where appropriate

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch vo_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit f5a98884ab9776dcc606152afaa678f24264039f
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Wed Mar 25 09:19:57 2020 -0700

    replace std::function with template for subset sum estimates, inline internal methods where appropriate
---
 common/include/bounds_binomial_proportions.hpp | 26 +++++------
 sampling/include/var_opt_sketch.hpp            | 63 +++++++++++++-------------
 sampling/include/var_opt_sketch_impl.hpp       |  5 +-
 3 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/common/include/bounds_binomial_proportions.hpp b/common/include/bounds_binomial_proportions.hpp
index 8b00cbd..06ab484 100644
--- a/common/include/bounds_binomial_proportions.hpp
+++ b/common/include/bounds_binomial_proportions.hpp
@@ -110,7 +110,7 @@ public:
    * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
     else if (k == 0) { return 0.0; }
@@ -145,7 +145,7 @@ public:
    * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
     else if (k == n) { return 1.0; }
@@ -167,7 +167,7 @@ public:
    * @param k is the number of successes. Must be non-negative, and cannot exceed n.
    * @return the estimate of the unknown binomial proportion.
    */
-  static double estimate_unknown_p(long n, long k) {
+  static inline double estimate_unknown_p(long n, long k) {
     check_inputs(n, k);
     if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
     else { return ((double) k / (double) n); }
@@ -178,7 +178,7 @@ public:
    * @param x is the input to the erf function
    * @return returns erf(x), accurate to roughly 7 decimal digits.
    */
-  static double erf(double x) {
+  static inline double erf(double x) {
     if (x < 0.0) { return (-1.0 * (erf_of_nonneg(-1.0 * x))); }
     else { return (erf_of_nonneg(x)); }
   }
@@ -188,12 +188,12 @@ public:
    * @param x is the input to the normal_cdf function
    * @return returns the approximation to normalCDF(x).
    */
-   static double normal_cdf(double x) {
+   static inline double normal_cdf(double x) {
     return (0.5 * (1.0 + (erf(x / (sqrt(2.0))))));
   }
 
 private:
-  static void check_inputs(long n, long k) {
+  static inline void check_inputs(long n, long k) {
     if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
     if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
     if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
@@ -201,7 +201,7 @@ private:
 
   //@formatter:off
   // Abramowitz and Stegun formula 7.1.28, p. 88; Claims accuracy of about 7 decimal digits */
-  static double erf_of_nonneg(double x) {
+  static inline double erf_of_nonneg(double x) {
     // The constants that appear below, formatted for easy checking against the book.
     //    a1 = 0.07052 30784
     //    a3 = 0.00927 05272
@@ -234,7 +234,7 @@ private:
     return (1.0 - (1.0 / sum16));
   }
 
-  static double delta_of_num_stdevs(double kappa) {
+  static inline double delta_of_num_stdevs(double kappa) {
     return (normal_cdf(-1.0 * kappa));
   }
 
@@ -251,7 +251,7 @@ private:
   // and it is worth keeping it that way so that it will always be easy to verify
   // that the formula was typed in correctly.
 
-  static double abramowitz_stegun_formula_26p5p22(double a, double b,
+  static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
       double yp) {
     const double b2m1 = (2.0 * b) - 1.0;
     const double a2m1 = (2.0 * a) - 1.0;
@@ -268,19 +268,19 @@ private:
 
   // Formulas for some special cases.
 
-  static double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
     return (1.0 - pow(delta, (1.0 / n)));
   }
 
-  static double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
     return (pow(delta, (1.0 / n)));
   }
 
-  static double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
     return (1.0 - pow((1.0 - delta), (1.0 / n)));
   }
 
-  static double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
+  static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
     return (pow((1.0 - delta), (1.0 / n)));
   }
 
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index a577200..d45b0aa 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -69,20 +69,20 @@ class var_opt_sketch {
     void update(const T& item, double weight=1.0);
     //void update(T&& item, double weight=1.0);
 
-    uint32_t get_k() const;
-    uint64_t get_n() const;
-    uint32_t get_num_samples() const;
+    inline uint32_t get_k() const;
+    inline uint64_t get_n() const;
+    inline uint32_t get_num_samples() const;
     
-    bool is_empty() const;
+    inline bool is_empty() const;
     void reset();
 
     // version for fixed-size arithmetic types (integer, floating point)
     template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
-    size_t get_serialized_size_bytes() const;
+    inline size_t get_serialized_size_bytes() const;
 
     // version for all other types
     template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
-    size_t get_serialized_size_bytes() const;
+    inline size_t get_serialized_size_bytes() const;
 
     std::vector<uint8_t, AllocU8<A>> serialize(unsigned header_size_bytes = 0) const;
     void serialize(std::ostream& os) const;
@@ -95,7 +95,8 @@ class var_opt_sketch {
     std::ostream& items_to_stream(std::ostream& os) const;
     std::string items_to_string() const;
 
-    subset_summary estimate_subset_sum(std::function<bool(T)> predicate) const;
+    template<typename P>
+    subset_summary estimate_subset_sum(P predicate) const;
 
     class const_iterator;
     const_iterator begin() const;
@@ -163,32 +164,32 @@ class var_opt_sketch {
     var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r);
 
     // internal-use-only updates
-    void update(const T& item, double weight, bool mark);
-    void update_warmup_phase(const T& item, double weight, bool mark);
-    void update_light(const T& item, double weight, bool mark);
-    void update_heavy_r_eq1(const T& item, double weight, bool mark);
-    void update_heavy_general(const T& item, double weight, bool mark);
-
-    double get_tau() const;
-    double peek_min() const;
-    bool is_marked(int idx) const;
+    inline void update(const T& item, double weight, bool mark);
+    inline void update_warmup_phase(const T& item, double weight, bool mark);
+    inline void update_light(const T& item, double weight, bool mark);
+    inline void update_heavy_r_eq1(const T& item, double weight, bool mark);
+    inline void update_heavy_general(const T& item, double weight, bool mark);
+
+    inline double get_tau() const;
+    inline double peek_min() const;
+    inline bool is_marked(int idx) const;
     
-    uint32_t pick_random_slot_in_r() const;
-    uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
-    uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
-
-    void transition_from_warmup();
-    void convert_to_heap();
-    void restore_towards_leaves(int slot_in);
-    void restore_towards_root(int slot_in);
-    void push(const T& item, double wt, bool mark);
-    void pop_min_to_m_region();
+    inline uint32_t pick_random_slot_in_r() const;
+    inline uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
+    inline uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
+
+    inline void transition_from_warmup();
+    inline void convert_to_heap();
+    inline void restore_towards_leaves(int slot_in);
+    inline void restore_towards_root(int slot_in);
+    inline void push(const T& item, double wt, bool mark);
+    inline void pop_min_to_m_region();
     void grow_candidate_set(double wt_cands, int num_cands);    
     void decrease_k_by_1();
     void strip_marks();
     void force_set_k(int k); // used to resolve union gadget into sketch
     void downsample_candidate_set(double wt_cands, int num_cands);
-    void swap_values(int src, int dst);
+    inline void swap_values(int src, int dst);
     void grow_data_arrays();
     void allocate_data_arrays(uint32_t tgt_size, bool use_marks);
 
@@ -201,14 +202,14 @@ class var_opt_sketch {
     // things to move to common utils and share among sketches
     static uint32_t get_adjusted_size(int max_size, int resize_target);
     static uint32_t starting_sub_multiple(int lg_target, int lg_rf, int lg_min);
-    static double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
-    static double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
+    static inline double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
+    static inline double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
     static bool is_power_of_2(uint32_t v);
     static uint32_t to_log_2(uint32_t v);
     static uint32_t count_trailing_zeros(uint32_t v);
     static uint32_t ceiling_power_of_2(uint32_t n);
-    static uint32_t next_int(uint32_t max_value);
-    static double next_double_exclude_zero();
+    static inline uint32_t next_int(uint32_t max_value);
+    static inline double next_double_exclude_zero();
 };
 
 template<typename T, typename S, typename A>
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index 93f10b7..f65dbba 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -1325,7 +1325,8 @@ void var_opt_sketch<T, S, A>::validate_and_set_current_size(int preamble_longs)
 
 
 template<typename T, typename S, typename A>
-subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T)> predicate) const {
+template<typename P>
+subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
   if (n_ == 0) {
     return {0.0, 0.0, 0.0, 0.0};
   }
@@ -1363,7 +1364,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T
   double lb_true_fraction = pseudo_hypergeometric_lb_on_p(r_, r_true_count, effective_sampling_rate);
   double estimated_true_fraction = (1.0 * r_true_count) / r_;
   double ub_true_fraction = pseudo_hypergeometric_ub_on_p(r_, r_true_count, effective_sampling_rate);
-  
+
   return {  h_true_wt + (total_wt_r_ * lb_true_fraction),
             h_true_wt + (total_wt_r_ * estimated_true_fraction),
             h_true_wt + (total_wt_r_ * ub_true_fraction),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org