You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/03/25 16:20:18 UTC

[incubator-datasketches-cpp] branch vo_performance created (now f5a9888)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch vo_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


      at f5a9888  replace std::function with template for subset sum estimates, inline internal methods where appropriate

This branch includes the following new commits:

     new f5a9888  replace std::function with template for subset sum estimates, inline internal methods where appropriate

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org

[incubator-datasketches-cpp] 01/01: replace std::function with template for subset sum estimates, inline internal methods where appropriate

Posted by jm...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch vo_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit f5a98884ab9776dcc606152afaa678f24264039f
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Wed Mar 25 09:19:57 2020 -0700

    replace std::function with template for subset sum estimates, inline internal methods where appropriate
---
 common/include/bounds_binomial_proportions.hpp | 26 +++++------
 sampling/include/var_opt_sketch.hpp            | 63 +++++++++++++-------------
 sampling/include/var_opt_sketch_impl.hpp       |  5 +-
 3 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/common/include/bounds_binomial_proportions.hpp b/common/include/bounds_binomial_proportions.hpp
index 8b00cbd..06ab484 100644
--- a/common/include/bounds_binomial_proportions.hpp
+++ b/common/include/bounds_binomial_proportions.hpp
@@ -110,7 +110,7 @@ public:
    * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
     else if (k == 0) { return 0.0; }
@@ -145,7 +145,7 @@ public:
    * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
     else if (k == n) { return 1.0; }
@@ -167,7 +167,7 @@ public:
    * @param k is the number of successes. Must be non-negative, and cannot exceed n.
    * @return the estimate of the unknown binomial proportion.
    */
-  static double estimate_unknown_p(long n, long k) {
+  static inline double estimate_unknown_p(long n, long k) {
     check_inputs(n, k);
     if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
     else { return ((double) k / (double) n); }
@@ -178,7 +178,7 @@ public:
    * @param x is the input to the erf function
    * @return returns erf(x), accurate to roughly 7 decimal digits.
    */
-  static double erf(double x) {
+  static inline double erf(double x) {
     if (x < 0.0) { return (-1.0 * (erf_of_nonneg(-1.0 * x))); }
     else { return (erf_of_nonneg(x)); }
   }
@@ -188,12 +188,12 @@ public:
    * @param x is the input to the normal_cdf function
    * @return returns the approximation to normalCDF(x).
    */
-   static double normal_cdf(double x) {
+   static inline double normal_cdf(double x) {
     return (0.5 * (1.0 + (erf(x / (sqrt(2.0))))));
   }
 
 private:
-  static void check_inputs(long n, long k) {
+  static inline void check_inputs(long n, long k) {
     if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
     if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
     if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
@@ -201,7 +201,7 @@ private:
 
   //@formatter:off
   // Abramowitz and Stegun formula 7.1.28, p. 88; Claims accuracy of about 7 decimal digits */
-  static double erf_of_nonneg(double x) {
+  static inline double erf_of_nonneg(double x) {
     // The constants that appear below, formatted for easy checking against the book.
     //    a1 = 0.07052 30784
     //    a3 = 0.00927 05272
@@ -234,7 +234,7 @@ private:
     return (1.0 - (1.0 / sum16));
   }
 
-  static double delta_of_num_stdevs(double kappa) {
+  static inline double delta_of_num_stdevs(double kappa) {
     return (normal_cdf(-1.0 * kappa));
   }
 
@@ -251,7 +251,7 @@ private:
   // and it is worth keeping it that way so that it will always be easy to verify
   // that the formula was typed in correctly.
 
-  static double abramowitz_stegun_formula_26p5p22(double a, double b,
+  static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
       double yp) {
     const double b2m1 = (2.0 * b) - 1.0;
     const double a2m1 = (2.0 * a) - 1.0;
@@ -268,19 +268,19 @@ private:
 
   // Formulas for some special cases.
 
-  static double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
     return (1.0 - pow(delta, (1.0 / n)));
   }
 
-  static double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
     return (pow(delta, (1.0 / n)));
   }
 
-  static double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
     return (1.0 - pow((1.0 - delta), (1.0 / n)));
   }
 
-  static double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
+  static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
     return (pow((1.0 - delta), (1.0 / n)));
   }
 
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index a577200..d45b0aa 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -69,20 +69,20 @@ class var_opt_sketch {
     void update(const T& item, double weight=1.0);
     //void update(T&& item, double weight=1.0);
 
-    uint32_t get_k() const;
-    uint64_t get_n() const;
-    uint32_t get_num_samples() const;
+    inline uint32_t get_k() const;
+    inline uint64_t get_n() const;
+    inline uint32_t get_num_samples() const;
     
-    bool is_empty() const;
+    inline bool is_empty() const;
     void reset();
 
     // version for fixed-size arithmetic types (integer, floating point)
     template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
-    size_t get_serialized_size_bytes() const;
+    inline size_t get_serialized_size_bytes() const;
 
     // version for all other types
     template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
-    size_t get_serialized_size_bytes() const;
+    inline size_t get_serialized_size_bytes() const;
 
     std::vector<uint8_t, AllocU8<A>> serialize(unsigned header_size_bytes = 0) const;
     void serialize(std::ostream& os) const;
@@ -95,7 +95,8 @@ class var_opt_sketch {
     std::ostream& items_to_stream(std::ostream& os) const;
     std::string items_to_string() const;
 
-    subset_summary estimate_subset_sum(std::function<bool(T)> predicate) const;
+    template<typename P>
+    subset_summary estimate_subset_sum(P predicate) const;
 
     class const_iterator;
     const_iterator begin() const;
@@ -163,32 +164,32 @@ class var_opt_sketch {
     var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r);
 
     // internal-use-only updates
-    void update(const T& item, double weight, bool mark);
-    void update_warmup_phase(const T& item, double weight, bool mark);
-    void update_light(const T& item, double weight, bool mark);
-    void update_heavy_r_eq1(const T& item, double weight, bool mark);
-    void update_heavy_general(const T& item, double weight, bool mark);
-
-    double get_tau() const;
-    double peek_min() const;
-    bool is_marked(int idx) const;
+    inline void update(const T& item, double weight, bool mark);
+    inline void update_warmup_phase(const T& item, double weight, bool mark);
+    inline void update_light(const T& item, double weight, bool mark);
+    inline void update_heavy_r_eq1(const T& item, double weight, bool mark);
+    inline void update_heavy_general(const T& item, double weight, bool mark);
+
+    inline double get_tau() const;
+    inline double peek_min() const;
+    inline bool is_marked(int idx) const;
     
-    uint32_t pick_random_slot_in_r() const;
-    uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
-    uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
-
-    void transition_from_warmup();
-    void convert_to_heap();
-    void restore_towards_leaves(int slot_in);
-    void restore_towards_root(int slot_in);
-    void push(const T& item, double wt, bool mark);
-    void pop_min_to_m_region();
+    inline uint32_t pick_random_slot_in_r() const;
+    inline uint32_t choose_delete_slot(double wt_cand, int num_cand) const;
+    inline uint32_t choose_weighted_delete_slot(double wt_cand, int num_cand) const;
+
+    inline void transition_from_warmup();
+    inline void convert_to_heap();
+    inline void restore_towards_leaves(int slot_in);
+    inline void restore_towards_root(int slot_in);
+    inline void push(const T& item, double wt, bool mark);
+    inline void pop_min_to_m_region();
     void grow_candidate_set(double wt_cands, int num_cands);    
     void decrease_k_by_1();
     void strip_marks();
     void force_set_k(int k); // used to resolve union gadget into sketch
     void downsample_candidate_set(double wt_cands, int num_cands);
-    void swap_values(int src, int dst);
+    inline void swap_values(int src, int dst);
     void grow_data_arrays();
     void allocate_data_arrays(uint32_t tgt_size, bool use_marks);
 
@@ -201,14 +202,14 @@ class var_opt_sketch {
     // things to move to common utils and share among sketches
     static uint32_t get_adjusted_size(int max_size, int resize_target);
     static uint32_t starting_sub_multiple(int lg_target, int lg_rf, int lg_min);
-    static double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
-    static double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
+    static inline double pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate);
+    static inline double pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate);
     static bool is_power_of_2(uint32_t v);
     static uint32_t to_log_2(uint32_t v);
     static uint32_t count_trailing_zeros(uint32_t v);
     static uint32_t ceiling_power_of_2(uint32_t n);
-    static uint32_t next_int(uint32_t max_value);
-    static double next_double_exclude_zero();
+    static inline uint32_t next_int(uint32_t max_value);
+    static inline double next_double_exclude_zero();
 };
 
 template<typename T, typename S, typename A>
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index 93f10b7..f65dbba 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -1325,7 +1325,8 @@ void var_opt_sketch<T, S, A>::validate_and_set_current_size(int preamble_longs)
 
 
 template<typename T, typename S, typename A>
-subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T)> predicate) const {
+template<typename P>
+subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
   if (n_ == 0) {
     return {0.0, 0.0, 0.0, 0.0};
   }
@@ -1363,7 +1364,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T
   double lb_true_fraction = pseudo_hypergeometric_lb_on_p(r_, r_true_count, effective_sampling_rate);
   double estimated_true_fraction = (1.0 * r_true_count) / r_;
   double ub_true_fraction = pseudo_hypergeometric_ub_on_p(r_, r_true_count, effective_sampling_rate);
-  
+
   return {  h_true_wt + (total_wt_r_ * lb_true_fraction),
             h_true_wt + (total_wt_r_ * estimated_true_fraction),
             h_true_wt + (total_wt_r_ * ub_true_fraction),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org