You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/02/18 22:07:26 UTC

[incubator-datasketches-cpp] 01/01: add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch varopt_cleanup
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 6bf8b1489b0f58bd4502d896e691a590e5a7b718
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Feb 18 14:07:04 2020 -0800

    add consts to varopt methods where appropriate, clean up gap item handling, add varopt to python readme
---
 python/README.md                         |  3 +
 python/src/kll_wrapper.cpp               |  4 +-
 sampling/include/var_opt_sketch_impl.hpp | 98 ++++++++++++++++----------------
 sampling/include/var_opt_union_impl.hpp  | 39 +++++++------
 4 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/python/README.md b/python/README.md
index 867aad9..0d083ba 100644
--- a/python/README.md
+++ b/python/README.md
@@ -39,6 +39,9 @@ Having installed the library, loading the Datasketches library in Python is simp
 - CPC
     - `cpc_sketch`
     - `cpc_union`
+- VarOpt Sampling
+    - `var_opt_sketch`
+    - `var_opt_union`
 
 ## Known Differences from C++
 
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 0740f11..69b1635 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -110,7 +110,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
     .def(py::init<uint16_t>(), py::arg("k"))
     .def(py::init<const kll_sketch<T>&>())
     .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"))
-    .def("merge", &kll_sketch<T>::merge, py::arg("sketch"))
+    .def("merge", (void (kll_sketch<T>::*)(const kll_sketch<T>&)) &kll_sketch<T>::merge, py::arg("sketch"))
     .def("__str__", &dspy::kll_sketch_to_string<T>)
     .def("is_empty", &kll_sketch<T>::is_empty)
     .def("get_n", &kll_sketch<T>::get_n)
@@ -127,8 +127,6 @@ void bind_kll_sketch(py::module &m, const char* name) {
          py::arg("as_pmf"))
     .def_static("get_normalized_rank_error", &dspy::kll_sketch_generic_normalized_rank_error<T>,
          py::arg("k"), py::arg("as_pmf"))
-    // can't yet get this one to work
-    //.def("get_serialized_size_bytes", &kll_sketch<T>::get_serialized_size_bytes)
     .def("serialize", &dspy::kll_sketch_serialize<T>)
     .def_static("deserialize", &dspy::kll_sketch_deserialize<T>)
     ;
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index f0e316a..ddf0716 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -492,11 +492,11 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
-  bool empty = (h_ == 0) && (r_ == 0);
+  const bool empty = (h_ == 0) && (r_ == 0);
 
-  uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
+  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
                                   : (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL));
-  uint8_t first_byte = (preLongs & 0x3F) | ((static_cast<uint8_t>(rf_)) << 6);
+  const uint8_t first_byte = (preLongs & 0x3F) | ((static_cast<uint8_t>(rf_)) << 6);
   uint8_t flags = (marks_ != nullptr ? GADGET_FLAG_MASK : 0);
 
   if (empty) {
@@ -504,8 +504,8 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
   }
 
   // first prelong
-  uint8_t ser_ver(SER_VER);
-  uint8_t family(FAMILY_ID);
+  const uint8_t ser_ver(SER_VER);
+  const uint8_t family(FAMILY_ID);
   os.write((char*)&first_byte, sizeof(uint8_t));
   os.write((char*)&ser_ver, sizeof(uint8_t));
   os.write((char*)&family, sizeof(uint8_t));
@@ -571,8 +571,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
   check_preamble_longs(preamble_longs, flags);
   check_family_and_serialization_version(family_id, serial_version);
 
-  bool is_empty = flags & EMPTY_FLAG_MASK;
-  bool is_gadget = flags & GADGET_FLAG_MASK;
+  const bool is_empty = flags & EMPTY_FLAG_MASK;
+  const bool is_gadget = flags & GADGET_FLAG_MASK;
 
   return is_empty ? var_opt_sketch<T,S,A>(k, rf, is_gadget) : var_opt_sketch<T,S,A>(k, rf, is_gadget, preamble_longs, bytes, size);
 }
@@ -595,8 +595,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
   check_preamble_longs(preamble_longs, flags);
   check_family_and_serialization_version(family_id, serial_version);
 
-  bool is_empty = flags & EMPTY_FLAG_MASK;
-  bool is_gadget = flags & GADGET_FLAG_MASK;
+  const bool is_empty = flags & EMPTY_FLAG_MASK;
+  const bool is_gadget = flags & GADGET_FLAG_MASK;
 
   return is_empty ? var_opt_sketch<T,S,A>(k, rf, is_gadget) : var_opt_sketch<T,S,A>(k, rf, is_gadget, preamble_longs, is);
 }
@@ -608,10 +608,10 @@ bool var_opt_sketch<T,S,A>::is_empty() const {
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::reset() {
-  uint32_t prev_alloc = curr_items_alloc_;
+  const uint32_t prev_alloc = curr_items_alloc_;
 
-  uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
-  int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
+  const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
+  const int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
   curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
   if (curr_items_alloc_ == k_) { // if full size, need to leave 1 for the gap
     ++curr_items_alloc_;
@@ -631,7 +631,7 @@ void var_opt_sketch<T,S,A>::reset() {
   }
 
   if (curr_items_alloc_ < prev_alloc) {
-    bool is_gadget = (marks_ != nullptr);
+    const bool is_gadget = (marks_ != nullptr);
   
     A().deallocate(data_, curr_items_alloc_);
     AllocDouble().deallocate(weights_, curr_items_alloc_);
@@ -663,7 +663,7 @@ uint32_t var_opt_sketch<T,S,A>::get_k() const {
 
 template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::get_num_samples() const {
-  uint32_t num_in_sketch = h_ + r_;
+  const uint32_t num_in_sketch = h_ + r_;
   return (num_in_sketch < k_ ? num_in_sketch : k_);
 }
 
@@ -696,7 +696,7 @@ template<typename T, typename S, typename A>
 std::ostream& var_opt_sketch<T,S,A>::items_to_stream(std::ostream& os) const {
   os << "### Sketch Items" << std::endl;
 
-  uint32_t print_length = (n_ < k_ ? n_ : k_ + 1);
+  const uint32_t print_length = (n_ < k_ ? n_ : k_ + 1);
   for (int i = 0; i < print_length; ++i) {
     if (i == h_) {
       os << i << ": GAP" << std::endl;
@@ -739,13 +739,13 @@ void var_opt_sketch<T,S,A>::update(const T& item, double weight, bool mark) {
 
     // what tau would be if deletion candidates turn out to be R plus the new item
     // note: (r_ + 1) - 1 is intentional
-    double hypothetical_tau = (weight + total_wt_r_) / ((r_ + 1) - 1);
+    const double hypothetical_tau = (weight + total_wt_r_) / ((r_ + 1) - 1);
 
     // is new item's turn to be considered for reservoir?
-    double condition1 = (h_ == 0) || (weight <= peek_min());
+    const double condition1 = (h_ == 0) || (weight <= peek_min());
 
     // is new item light enough for reservoir?
-    double condition2 = weight < hypothetical_tau;
+    const double condition2 = weight < hypothetical_tau;
   
     if (condition1 && condition2) {
       update_light(item, weight, mark);
@@ -791,7 +791,7 @@ void var_opt_sketch<T,S,A>::update_light(const T& item, double weight, bool mark
   assert(r_ >= 1);
   assert((r_ + h_) == k_);
 
-  int m_slot = h_; // index of the gap, which becomes the M region
+  const int m_slot = h_; // index of the gap, which becomes the M region
   if (filled_data_) {
     data_[m_slot] = item;
   } else {
@@ -839,7 +839,7 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(const T& item, double weight, boo
 
   // Any set of two items is downsample-able to one item,
   // so the two lightest items are a valid starting point for the following
-  int m_slot = k_ - 1; // array is k+1, 1 in R, so slot before is M
+  const int m_slot = k_ - 1; // array is k+1, 1 in R, so slot before is M
   grow_candidate_set(weights_[m_slot] + total_wt_r_, 2);
 }
 
@@ -870,8 +870,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     // still just data), reduce k, and then re-insert the item
 
     // first, slide the R zone to the left by 1, temporarily filling the gap
-    uint32_t old_gap_idx = h_;
-    uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
+    const uint32_t old_gap_idx = h_;
+    const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
 
     assert(old_final_r_idx == k_);
     swap_values(old_final_r_idx, old_gap_idx);
@@ -880,10 +880,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     // reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
     // the push() of the item that will probably happen later will be cheap.
 
-    uint32_t pulled_idx = h_ - 1;
-    T&& pulled_item = std::move(data_[pulled_idx]);
+    const uint32_t pulled_idx = h_ - 1;
     double pulled_weight = weights_[pulled_idx];
     bool pulled_mark = marks_[pulled_idx];
+    // will move the pulled item below; don't do antying to it here
 
     if (pulled_mark) { --num_marks_in_h_; }
     weights_[pulled_idx] = -1.0; // to make bugs easier to spot
@@ -892,13 +892,13 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
     --k_;
     --n_; // will be re-incremented with the update
 
-    update(pulled_item, pulled_weight, pulled_mark);
+    update(std::move(data_[pulled_idx]), pulled_weight, pulled_mark);
   } else if ((h_ == 0) && (r_ > 0)) {
     // pure reservoir mode, so can simply eject a randomly chosen sample from the reservoir
     assert(r_ >= 2);
 
-    uint32_t r_idx_to_delete = 1 + next_int(r_); // 1 for the gap
-    uint32_t rightmost_r_idx = (1 + r_) - 1;
+    const uint32_t r_idx_to_delete = 1 + next_int(r_); // 1 for the gap
+    const uint32_t rightmost_r_idx = (1 + r_) - 1;
     swap_values(r_idx_to_delete, rightmost_r_idx);
     weights_[rightmost_r_idx] = -1.0;
 
@@ -923,7 +923,7 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::grow_data_arrays() {
-  uint32_t prev_size = curr_items_alloc_;
+  const uint32_t prev_size = curr_items_alloc_;
   curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
   if (curr_items_alloc_ == k_) {
     ++curr_items_alloc_;
@@ -988,8 +988,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
     return; // nothing to do
   }
 
-  int last_slot = h_ - 1;
-  int last_non_leaf = ((last_slot + 1) / 2) - 1;
+  const int last_slot = h_ - 1;
+  const int last_non_leaf = ((last_slot + 1) / 2) - 1;
   
   for (int j = last_non_leaf; j >= 0; --j) {
     restore_towards_leaves(j);
@@ -1111,8 +1111,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, int num_cands) {
   assert(m_ == 0 || m_ == 1);
 
   while (h_ > 0) {
-    double next_wt = peek_min();
-    double next_tot_wt = wt_cands + next_wt;
+    const double next_wt = peek_min();
+    const double next_tot_wt = wt_cands + next_wt;
 
     // test for strict lightness of next prospect (denominator multiplied through)
     // ideally: (next_wt * (next_num_cands-1) < next_tot_wt)
@@ -1135,15 +1135,15 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, int num_ca
   assert(h_ + num_cands == k_ + 1);
 
   // need this before overwriting anything
-  int delete_slot = choose_delete_slot(wt_cands, num_cands);
-  int leftmost_cand_slot = h_;
+  const int delete_slot = choose_delete_slot(wt_cands, num_cands);
+  const int leftmost_cand_slot = h_;
   assert(delete_slot >= leftmost_cand_slot);
   assert(delete_slot <= k_);
 
   // Overwrite weights for items from M moving into R,
   // to make bugs more obvious. Also needed so anyone reading the
   // weight knows if it's invalid without checking h_ and m_
-  int stop_idx = leftmost_cand_slot + m_;
+  const int stop_idx = leftmost_cand_slot + m_;
   for (int j = leftmost_cand_slot; j < stop_idx; ++j) {
     weights_[j] = -1.0;
   }
@@ -1175,8 +1175,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, int num_cand
     }
   } else {
     // general case
-    int delete_slot = choose_weighted_delete_slot(wt_cands, num_cands);
-    int first_r_slot = h_ + m_;
+    const int delete_slot = choose_weighted_delete_slot(wt_cands, num_cands);
+    const int first_r_slot = h_ + m_;
     if (delete_slot == first_r_slot) {
       return pick_random_slot_in_r();
     } else {
@@ -1189,9 +1189,9 @@ template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, int num_cands) const {
   assert(m_ >= 1);
 
-  int offset = h_;
-  int final_m = (offset + m_) - 1;
-  int num_to_keep = num_cands - 1;
+  const int offset = h_;
+  const int final_m = (offset + m_) - 1;
+  const int num_to_keep = num_cands - 1;
 
   double left_subtotal = 0.0;
   double right_subtotal = -1.0 * wt_cands * next_double_exclude_zero();
@@ -1212,7 +1212,7 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, int
 template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
   assert(r_ > 0);
-  int offset = h_ + m_;
+  const int offset = h_ + m_;
   if (r_ == 1) {
     return offset;
   } else {
@@ -1246,7 +1246,7 @@ void var_opt_sketch<T,S,A>::strip_marks() {
 
 template<typename T, typename S, typename A>
 void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
-  bool is_empty(flags & EMPTY_FLAG_MASK);
+  const bool is_empty(flags & EMPTY_FLAG_MASK);
   
   if (is_empty) {
     if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
@@ -1300,9 +1300,9 @@ void var_opt_sketch<T, S, A>::validate_and_set_current_size(int preamble_longs)
        "Found r = " + std::to_string(r_));
     }
 
-    uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
-    uint32_t min_lg_size = to_log_2(ceiling_power_of_2(h_));
-    int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, min_lg_size);
+    const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
+    const uint32_t min_lg_size = to_log_2(ceiling_power_of_2(h_));
+    const int initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, min_lg_size);
     curr_items_alloc_ = get_adjusted_size(k_, 1 << initial_lg_size);
     if (curr_items_alloc_ == k_) { // if full size, need to leave 1 for the gap
       ++curr_items_alloc_;
@@ -1344,7 +1344,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(std::function<bool(T
     return {h_true_wt, h_true_wt, h_true_wt, h_true_wt};
   }
 
-  uint64_t num_samples = n_ - h_;
+  const uint64_t num_samples = n_ - h_;
   assert(num_samples > 0);
   double effective_sampling_rate = r_ / static_cast<double>(num_samples);
   assert(effective_sampling_rate >= 0.0);
@@ -1512,13 +1512,13 @@ uint32_t var_opt_sketch<T,S,A>::starting_sub_multiple(int lg_target, int lg_rf,
 
 template<typename T, typename S, typename A>
 double var_opt_sketch<T,S,A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
-  double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
+  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
   return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
 }
 
 template<typename T, typename S, typename A>
 double var_opt_sketch<T,S,A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
-  double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
+  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
   return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
 }
 
@@ -1556,7 +1556,7 @@ template<typename T, typename S, typename A>
 uint32_t var_opt_sketch<T,S,A>::count_trailing_zeros(uint32_t v) {
   static const uint8_t ctz_byte_count[256] = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1, [...]
 
-  uint32_t tmp  =v;
+  uint32_t tmp = v;
   for (int j = 0; j < 4; ++j) {
     const int byte = (tmp & 0xFFUL);
     if (byte != 0) return (j << 3) + ctz_byte_count[byte];
diff --git a/sampling/include/var_opt_union_impl.hpp b/sampling/include/var_opt_union_impl.hpp
index 744d13d..4ecd7e6 100644
--- a/sampling/include/var_opt_union_impl.hpp
+++ b/sampling/include/var_opt_union_impl.hpp
@@ -199,7 +199,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
   uint64_t outer_tau_denom;
   ptr += copy_from_mem(ptr, &outer_tau_denom, sizeof(outer_tau_denom));
 
-  size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
+  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
   var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size);
 
   return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
@@ -218,8 +218,8 @@ template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
   bool empty = (n_ == 0);
 
-  uint8_t serialization_version(SER_VER);
-  uint8_t family_id(FAMILY_ID);
+  const uint8_t serialization_version(SER_VER);
+  const uint8_t family_id(FAMILY_ID);
 
   uint8_t preamble_longs;
   uint8_t flags;
@@ -251,10 +251,10 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
   std::vector<uint8_t, AllocU8<A>> bytes(size);
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
-  bool empty = n_ == 0;
+  const bool empty = n_ == 0;
 
-  uint8_t serialization_version(SER_VER);
-  uint8_t family_id(FAMILY_ID);
+  const uint8_t serialization_version(SER_VER);
+  const uint8_t family_id(FAMILY_ID);
 
   uint8_t preamble_longs;
   uint8_t flags;
@@ -355,8 +355,8 @@ void var_opt_union<T,S,A>::merge_into(const var_opt_sketch<T,S,A>& sketch) {
 
   // resolve tau
   if (sketch.r_ > 0) {
-    double sketch_tau = sketch.get_tau();
-    double outer_tau = get_outer_tau();
+    const double sketch_tau = sketch.get_tau();
+    const double outer_tau = get_outer_tau();
 
     if (outer_tau_denom_ == 0) {
       // detect first estimation mode sketch and grab its tau
@@ -394,7 +394,7 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
     // At this point, we know that marked items are present in H. So:
     //   1. Result will necessarily be in estimation mode
     //   2. Marked items currently in H need to be absorbed into reservoir (R)
-    bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
+    const bool is_pseudo_exact = detect_and_handle_subcase_of_pseudo_exact(gcopy);
     if (!is_pseudo_exact) {
       // continue with main logic
       migrate_marked_items_by_decreasing_k(gcopy);
@@ -430,22 +430,22 @@ bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(doub
 template<typename T, typename S, typename A>
 bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const {
   // gadget is seemingly exact
-  bool condition1 = gadget_.r_ == 0;
+  const bool condition1 = gadget_.r_ == 0;
 
   // but there are marked items in H, so only _pseudo_ exact
-  bool condition2 = gadget_.num_marks_in_h_ > 0;
+  const bool condition2 = gadget_.num_marks_in_h_ > 0;
 
   // if gadget is pseudo-exact and the number of marks equals outer_tau_denom, then we can deduce
   // from the bookkeeping logic of mergeInto() that all estimation mode input sketches must
   // have had the same tau, so we can throw all of the marked items into a common reservoir.
-  bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
+  const bool condition3 = gadget_.num_marks_in_h_ == outer_tau_denom_;
 
   if (!(condition1 && condition2 && condition3)) {
     return false;
   } else {
 
     // explicitly enforce rule that items in H should not be lighter than the sketch's tau
-    bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
+    const bool anti_condition4 = there_exist_unmarked_h_items_lighter_than_target(gadget_.get_tau());
     if (anti_condition4) {
       return false;
     } else {
@@ -466,7 +466,7 @@ bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_ske
  */
 template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const {
-  uint32_t result_k = gadget_.h_ + gadget_.r_;
+  const uint32_t result_k = gadget_.h_ + gadget_.r_;
 
   uint32_t result_h = 0;
   uint32_t result_r = 0;
@@ -509,11 +509,10 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
   assert((result_h + result_r) == result_k);
   assert(fabs(transferred_weight - outer_tau_numer_) < 1e-10);
 
-  double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
-  uint64_t result_n = n_;
+  const double result_r_weight = gadget_.total_wt_r_ + transferred_weight;
+  const uint64_t result_n = n_;
     
   // explicitly set weight value for the gap
-  //data[result_h] = nullptr;  invalid not a pointer
   wts[result_h] = -1.0;
 
   // clean up arrays in input sketch, replace with new values
@@ -538,9 +537,9 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
 // this is basically a continuation of get_result(), but modifying the input gadget copy
 template<typename T, typename S, typename A>
 void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& gcopy) const {
-  uint32_t r_count = gcopy.r_;
-  uint32_t h_count = gcopy.h_;
-  uint32_t k = gcopy.k_;
+  const uint32_t r_count = gcopy.r_;
+  const uint32_t h_count = gcopy.h_;
+  const uint32_t k = gcopy.k_;
 
   assert(gcopy.num_marks_in_h_ > 0); // ensured by caller
   // either full (of samples), or in pseudo-exact mode, or both


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org