You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/02/18 22:07:25 UTC

[incubator-datasketches-cpp] branch varopt_cleanup created (now 6bf8b14)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch varopt_cleanup
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


      at 6bf8b14  add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme

This branch includes the following new commits:

     new 6bf8b14  add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org

[incubator-datasketches-cpp] 01/01: add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme

Posted by jm...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch varopt_cleanup
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 6bf8b1489b0f58bd4502d896e691a590e5a7b718
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Feb 18 14:07:04 2020 -0800

    add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme
---
 python/README.md                         |  3 +
 python/src/kll_wrapper.cpp               |  4 +-
 sampling/include/var_opt_sketch_impl.hpp | 98 ++++++++++++++++----------------
 sampling/include/var_opt_union_impl.hpp  | 39 +++++++------
 4 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/python/README.md b/python/README.md
index 867aad9..0d083ba 100644
--- a/python/README.md
+++ b/python/README.md
@@ -39,6 +39,9 @@ Having installed the library, loading the Datasketches library in Python is simp
 - CPC
     - `cpc_sketch`
     - `cpc_union`
+- VarOpt Sampling
+    - `var_opt_sketch`
+    - `var_opt_union`
 
 ## Known Differences from C++
 
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 0740f11..69b1635 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -110,7 +110,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
     .def(py::init<uint16_t>(), py::arg("k"))
     .def(py::init<const kll_sketch<T>&>())
     .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"))
-    .def("merge", &kll_sketch<T>::merge, py::arg("sketch"))
+    .def("merge", (void (kll_sketch<T>::*)(const kll_sketch<T>&)) &kll_sketch<T>::merge, py::arg("sketch"))
     .def("__str__", &dspy::kll_sketch_to_string<T>)
     .def("is_empty", &kll_sketch<T>::is_empty)
     .def("get_n", &kll_sketch<T>::get_n)
@@ -127,8 +127,6 @@ void bind_kll_sketch(py::module &m, const char* name) {
          py::arg("as_pmf"))
     .def_static("get_normalized_rank_error", &dspy::kll_sketch_generic_normalized_rank_error<T>,
          py::arg("k"), py::arg("as_pmf"))
-    // can't yet get this one to work
-    //.def("get_serialized_size_bytes", &kll_sketch<T>::get_serialized_size_bytes)
     .def("serialize", &dspy::kll_sketch_serialize<T>)
     .def_static("deserialize", &dspy::kll_sketch_deserialize<T>)
     ;
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index f0e316a..ddf0716 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -492,11 +492,11 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
-  bool empty = (h_ == 0) && (r_ == 0);
+  const bool empty = (h_ == 0) && (r_ == 0);
 
-  uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
+  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
                                   : (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL));
-  uint8_t first_byte = (preLongs & 0x3F) | ((static_cast<uint8_t>(rf_)) << 6);
+  const uint8_t first_byte = (preLongs & 0x3F) | ((static_cast<uint8_t>(rf_)) << 6);
   uint8_t flags = (marks_ != nullptr ? GADGET_FLAG_MASK : 0);
 
   if (empty) {
@@ -504,8 +504,8 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
   }
 
   // first prelong
-  uint8_t ser_ver(SER_VER);
-  uint8_t family(FAMILY_ID);
+  const uint8_t ser_ver(SER_VER);
+  const uint8_t family(FAMILY_ID);
   os.write((char*)&first_byte, sizeof(uint8_t));
   os.write((char*)&ser_ver, sizeof(uint8_t));
   os.write((char*)&family, sizeof(uint8_t));
@@ -571,8 +571,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
   check_preamble_longs(preamble_longs, flags);
   check_family_and_serialization_version(family_id, serial_version);
 
-  bool is_empty = flags & EMPTY_FLAG_MASK;
-  bool is_gadget = flags & GADGET_FLAG_MASK;
+  const bool is_empty = flags & EMPTY_FLAG_MASK;
+  const bool is_gadget = flags & GADGET_FLAG_MASK;
 
   return is_empty ? var_opt_sketch<T,S,A>(k, rf, is_gadget) : var_opt_sketch<T,S,A>(k, rf, is_gadget, preamble_longs, bytes, size);
 }
@@ -595,8 +595,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
   check_preamble_longs(preamble_longs, flags);
   check_family_and_serialization_version(family_id, serial_version);
 
-  bool is_empty = flags & EMPTY_FLAG_MASK;
-  bool is_gadget = flags & GADGET_FLAG_MASK;
+  const bool is_empty = flags & EMPTY_FLAG_MASK;
+  const bool is_gadget = flags & GADGET_FLAG_MASK;
 
   return is_empty ? var_opt_sketch<T,S,A>(k, rf, is_gadget) : var_opt_sketch<T,S,A>(k, rf, is_gadget, preamble_longs, is);
 }
@@ -608,10 +608,10 @@ bool var_opt_sketch<T,S,A>::is_empty() const {
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::reset() {
-  uint32_t prev_alloc = curr_items_alloc_;
+  const uint32_t prev_alloc = curr_items_alloc_;
 
-  uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
-  int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
+  const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
+  const int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
   curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
   if (curr_items_alloc_ == k_) { // if full size, need to leave 1 for the gap
     ++curr_items_alloc_;
@@ -631,7 +631,7 @@ void var_opt_sketch<T,S,A>::reset() {
   }
 
   if (curr_items_alloc_ < prev_alloc) {
-    bool is_gadget = (marks_ != nullptr);
+    const bool is_gadget = (marks_ != nullptr);
   
     A().deallocate(data_, curr_items_alloc_);
     AllocDouble().deallocate(weights_, curr_items_alloc_);
@@ -663,7 +663,7 @@ uint32_t var_opt_sketch<T,S,A>::get_k() const {
 
 template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::get_num_samples() const {
-  uint32_t num_in_sketch = h_ + r_;
+  const uint32_t num_in_sketch = h_ + r_;
   return (num_in_sketch < k_ ? num_in_sketch : k_);
 }
 
@@ -696,7 +696,7 @@ template<typename T, typename S, typename A>
 std::ostream& var_opt_sketch<T,S,A>::items_to_stream(std::ostream& os) const {
   os << "### Sketch Items" << std::endl;
 
-  uint32_t print_length = (n_ < k_ ? n_ : k_ + 1);
+  const uint32_t print_length = (n_ < k_ ? n_ : k_ + 1);
   for (int i = 0; i < print_length; ++i) {
     if (i == h_) {
       os << i << ": GAP" << std::endl;
@@ -739,13 +739,13 @@ void var_opt_sketch<T,S,A>::update(const T& item, double weight, bool mark) {
 
     // what tau would be if deletion candidates turn out to be R plus the new item
     // note: (r_ + 1) - 1 is intentional
-    double hypothetical_tau = (weight + total_wt_r_) / ((r_ + 1) - 1);
+    const double hypothetical_tau = (weight + total_wt_r_) / ((r_ + 1) - 1);
 
     // is new item's turn to be considered for reservoir?
-    double condition1 = (h_ == 0) || (weight <= peek_min());
+    const double condition1 = (h_ == 0) || (weight <= peek_min());
 
     // is new item light enough for reservoir?
-    double condition2 = weight < hypothetical_tau;
+    const double condition2 = weight < hypothetical_tau;
   
     if (condition1 && condition2) {
       update_light(item, weight, mark);
@@ -791,7 +791,7 @@ void var_opt_sketch<T,S,A>::update_light(const T& item, double weight, bool mark
   assert(r_ >= 1);
   assert((r_ + h_) == k_);
 
-  int m_slot = h_; // index of the gap, which becomes the M region
+  const int m_slot = h_; // index of the gap, which becomes the M region
   if (filled_data_) {
     data_[m_slot] = item;
   } else {
@@ -839,7 +839,7 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(const T& item, double weight, boo
 
   // Any set of two items is downsample-able to one item,
   // so the two lightest items are a valid starting point for the following
-  int m_slot = k_ - 1; // array is k+1, 1 in R, so slot before is M
+  const int m_slot = k_ - 1; // array is k+1, 1 in R, so slot before is M
   grow_candidate_set(weights_[m_slot] + total_wt_r_, 2);
 }
 
@@ -870,8 +870,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     // still just data), reduce k, and then re-insert the item
 
     // first, slide the R zone to the left by 1, temporarily filling the gap
-    uint32_t old_gap_idx = h_;
-    uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
+    const uint32_t old_gap_idx = h_;
+    const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
 
     assert(old_final_r_idx == k_);
     swap_values(old_final_r_idx, old_gap_idx);
@@ -880,10 +880,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     // reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
     // the push() of the item that will probably happen later will be cheap.
 
-    uint32_t pulled_idx = h_ - 1;
-    T&& pulled_item = std::move(data_[pulled_idx]);
+    const uint32_t pulled_idx = h_ - 1;
     double pulled_weight = weights_[pulled_idx];
     bool pulled_mark = marks_[pulled_idx];
+    // will move the pulled item below; don't do antying to it here
 
     if (pulled_mark) { --num_marks_in_h_; }
     weights_[pulled_idx] = -1.0; // to make bugs easier to spot
@@ -892,13 +892,13 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     --k_;
     --n_; // will be re-incremented with the update
 
-    update(pulled_item, pulled_weight, pulled_mark);
+    update(std::move(data_[pulled_idx]), pulled_weight, pulled_mark);
   } else if ((h_ == 0) && (r_ > 0)) {
     // pure reservoir mode, so can simply eject a randomly chosen sample from the reservoir
     assert(r_ >= 2);
 
-    uint32_t r_idx_to_delete = 1 + next_int(r_); // 1 for the gap
-    uint32_t rightmost_r_idx = (1 + r_) - 1;
+    const uint32_t r_idx_to_delete = 1 + next_int(r_); // 1 for the gap
+    const uint32_t rightmost_r_idx = (1 + r_) - 1;
     swap_values(r_idx_to_delete, rightmost_r_idx);
     weights_[rightmost_r_idx] = -1.0;
 
@@ -923,7 +923,7 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::grow_data_arrays() {
-  uint32_t prev_size = curr_items_alloc_;
+  const uint32_t prev_size = curr_items_alloc_;
   curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
   if (curr_items_alloc_ == k_) {
     ++curr_items_alloc_;
@@ -988,8 +988,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
     return; // nothing to do
   }
 
-  int last_slot = h_ - 1;
-  int last_non_leaf = ((last_slot + 1) / 2) - 1;
+  const int last_slot = h_ - 1;
+  const int last_non_leaf = ((last_slot + 1) / 2) - 1;
   
   for (int j = last_non_leaf; j >= 0; --j) {
     restore_towards_leaves(j);
@@ -1111,8 +1111,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, int num_cands) {
   assert(m_ == 0 || m_ == 1);
 
   while (h_ > 0) {
-    double next_wt = peek_min();
-    double next_tot_wt = wt_cands + next_wt;
+    const double next_wt = peek_min();
+    const double next_tot_wt = wt_cands + next_wt;
 
     // test for strict lightness of next prospect (denominator multiplied through)
     // ideally: (next_wt * (next_num_cands-1) < next_tot_wt)
@@ -1135,15 +1135,15 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, int num_ca
   assert(h_ + num_cands == k_ + 1);
 
   // need this before overwriting anything
-  int delete_slot = choose_delete_slot(wt_cands, num_cands);
-  int leftmost_cand_slot = h_;
+  const int delete_slot = choose_delete_slot(wt_cands, num_cands);
+  const int leftmost_cand_slot = h_;
   assert(delete_slot >= leftmost_cand_slot);
   assert(delete_slot <= k_);
 
   // Overwrite weights for items from M moving into R,
   // to make bugs more obvious. Also needed so anyone reading the
   // weight knows if it's invalid without checking h_ and m_
-  int stop_idx = leftmost_cand_slot + m_;
+  const int stop_idx = leftmost_cand_slot + m_;
   for (int j = leftmost_cand_slot; j < stop_idx; ++j) {
     weights_[j] = -1.0;
   }
@@ -1175,8 +1175,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, int num_cand
     }
   } else {
     // general case
-    int delete_slot = choose_weighted_delete_slot(wt_cands, num_cands);
-    int first_r_slot = h_ + m_;
+    const int delete_slot = choose_weighted_delete_slot(wt_cands, num_cands);
+    const int first_r_slot = h_ + m_;
     if (delete_slot == first_r_slot) {
       return pick_random_slot_in_r();
     } else {
@@ -1189,9 +1189,9 @@ template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, int num_cands) const {
   assert(m_ >= 1);
 
-  int offset = h_;
-  int final_m = (offset + m_) - 1;
-  int num_to_keep = num_cands - 1;
+  const int offset = h_;
+  const int final_m = (offset + m_) - 1;
+  const int num_to_keep = num_cands - 1;
 
   double left_subtotal = 0.0;
   double right_subtotal = -1.0 * wt_cands * next_double_exclude_zero();
@@ -1212,7 +1212,7 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, int
 template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
   assert(r_ > 0);
-  int offset = h_ + m_;
+  const int offset = h_ + m_;
   if (r_ == 1) {
     return offset;
   } else {
@@ -1246,7 +1246,7 @@ void var_opt_sketch<T,S,A>::strip_marks() {
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
-  bool is_empty(flags & EMPTY_FLAG_MASK);
+  const bool is_empty(flags & EMPTY_FLAG_MASK);
   
   if (is_empty) {
     if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
@@ -1300,9 +1300,9 @@ void var_opt_sketch<T, S, A>::validate_and_set_current_size(int preamble_longs)
        "Found r = " + std::to_string(r_));
     }
 
-    uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
-    uint32_t min_lg_size = to_log_2(ceiling_power_of_2(h_));
-    int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, min_lg_size);
+    const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
+    const uint32_t min_lg_size = to_log_2(ceiling_power_of_2(h_));
+    const int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, min_lg_size);
     curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
     if (curr_items_alloc_ == k_) { // if full size, need to leave 1 for the gap
       ++curr_items_alloc_;
@@ -1344,7 +1344,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T
     return {h_true_wt, h_true_wt, h_true_wt, h_true_wt};
   }
 
-  uint64_t num_samples = n_ - h_;
+  const uint64_t num_samples = n_ - h_;
   assert(num_samples > 0);
   double effective_sampling_rate = r_ / static_cast<double>(num_samples);
   assert(effective_sampling_rate >= 0.0);
@@ -1512,13 +1512,13 @@ uint32_t var_opt_sketch<T,S,A>::starting_sub_multiple(int lg_target, int lg_rf,
 
 template<typename T, typename S, typename A>
 double var_opt_sketch<T,S,A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
-  double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
+  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
   return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
 }
 
 template<typename T, typename S, typename A>
 double var_opt_sketch<T,S,A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
-  double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
+  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
   return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
 }
 
@@ -1556,7 +1556,7 @@ template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::count_trailing_zeros(uint32_t v) {
   static const uint8_t ctz_byte_count[256] = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1, [...]
 
-  uint32_t tmp  =v;
+  uint32_t tmp = v;
   for (int j = 0; j < 4; ++j) {
     const int byte = (tmp & 0xFFUL);
     if (byte != 0) return (j << 3) + ctz_byte_count[byte];
diff --git a/sampling/include/var_opt_union_impl.hpp b/sampling/include/var_opt_union_impl.hpp
index 744d13d..4ecd7e6 100644
--- a/sampling/include/var_opt_union_impl.hpp
+++ b/sampling/include/var_opt_union_impl.hpp
@@ -199,7 +199,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
   uint64_t outer_tau_denom;
   ptr += copy_from_mem(ptr, &outer_tau_denom, sizeof(outer_tau_denom));
 
-  size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
+  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
   var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size);
 
   return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
@@ -218,8 +218,8 @@ template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
   bool empty = (n_ == 0);
 
-  uint8_t serialization_version(SER_VER);
-  uint8_t family_id(FAMILY_ID);
+  const uint8_t serialization_version(SER_VER);
+  const uint8_t family_id(FAMILY_ID);
 
   uint8_t preamble_longs;
   uint8_t flags;
@@ -251,10 +251,10 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
   std::vector<uint8_t, AllocU8<A>> bytes(size);
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
-  bool empty = n_ == 0;
+  const bool empty = n_ == 0;
 
-  uint8_t serialization_version(SER_VER);
-  uint8_t family_id(FAMILY_ID);
+  const uint8_t serialization_version(SER_VER);
+  const uint8_t family_id(FAMILY_ID);
 
   uint8_t preamble_longs;
   uint8_t flags;
@@ -355,8 +355,8 @@ void var_opt_union<T,S,A>::merge_into(const var_opt_sketch<T,S,A>& sketch) {
 
   // resolve tau
   if (sketch.r_ > 0) {
-    double sketch_tau = sketch.get_tau();
-    double outer_tau = get_outer_tau();
+    const double sketch_tau = sketch.get_tau();
+    const double outer_tau = get_outer_tau();
 
     if (outer_tau_denom_ == 0) {
       // detect first estimation mode sketch and grab its tau
@@ -394,7 +394,7 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
     // At this point, we know that marked items are present in H. So:
     //   1. Result will necessarily be in estimation mode
     //   2. Marked items currently in H need to be absorbed into reservoir (R)
-    bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
+    const bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
     if (!is_pseudo_exact) {
       // continue with main logic
       migrate_marked_items_by_decreasing_k(gcopy);
@@ -430,22 +430,22 @@ bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(doub
 template<typename T, typename S, typename A>
 bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const {
   // gadget is seemingly exact
-  bool condition1 = gadget_.r_ == 0;
+  const bool condition1 = gadget_.r_ == 0;
 
   // but there are marked items in H, so only _pseudo_ exact
-  bool condition2 = gadget_.num_marks_in_h_ > 0;
+  const bool condition2 = gadget_.num_marks_in_h_ > 0;
 
   // if gadget is pseudo-exact and the number of marks equals outer_tau_denom, then we can deduce
   // from the bookkeeping logic of mergeInto() that all estimation mode input sketches must
   // have had the same tau, so we can throw all of the marked items into a common reservoir.
-  bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
+  const bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
 
   if (!(condition1 && condition2 && condition3)) {
     return false;
   } else {
 
     // explicitly enforce rule that items in H should not be lighter than the sketch's tau
-    bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
+    const bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
     if (anti_condition4) {
       return false;
     } else {
@@ -466,7 +466,7 @@ bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_ske
  */
 template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const {
-  uint32_t result_k = gadget_.h_ + gadget_.r_;
+  const uint32_t result_k = gadget_.h_ + gadget_.r_;
 
   uint32_t result_h = 0;
   uint32_t result_r = 0;
@@ -509,11 +509,10 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
   assert((result_h + result_r) == result_k);
   assert(fabs(transferred_weight - outer_tau_numer_) < 1e-10);
 
-  double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
-  uint64_t result_n = n_;
+  const double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
+  const uint64_t result_n = n_;
     
   // explicitly set weight value for the gap
-  //data[result_h] = nullptr;  invalid not a pointer
   wts[result_h] = -1.0;
 
   // clean up arrays in input sketch, replace with new values
@@ -538,9 +537,9 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
 // this is basically a continuation of get_result(), but modifying the input gadget copy
 template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& gcopy) const {
-  uint32_t r_count = gcopy.r_;
-  uint32_t h_count = gcopy.h_;
-  uint32_t k = gcopy.k_;
+  const uint32_t r_count = gcopy.r_;
+  const uint32_t h_count = gcopy.h_;
+  const uint32_t k = gcopy.k_;
 
   assert(gcopy.num_marks_in_h_ > 0); // ensured by caller
   // either full (of samples), or in pseudo-exact mode, or both


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org