You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2024/03/07 22:04:40 UTC

(datasketches-cpp) 01/01: removed unused code

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tdigest
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 93a316cba18e939b26f6118038f2959335c1b75d
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Mar 7 14:04:28 2024 -0800

    removed unused code
---
 tdigest/include/tdigest.hpp      |  4 ----
 tdigest/include/tdigest_impl.hpp | 30 +++++++-----------------------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/tdigest/include/tdigest.hpp b/tdigest/include/tdigest.hpp
index 6087115..7bb9c93 100644
--- a/tdigest/include/tdigest.hpp
+++ b/tdigest/include/tdigest.hpp
@@ -81,10 +81,6 @@ public:
   using value_type = T;
   using allocator_type = Allocator;
 
-  static const bool USE_ALTERNATING_SORT = true;
-  static const bool USE_TWO_LEVEL_COMPRESSION = true;
-  static const bool USE_WEIGHT_LIMIT = true;
-
   static const uint16_t DEFAULT_K = 200;
 
   using W = typename std::conditional<std::is_same<T, double>::value, uint64_t, uint32_t>::type;
diff --git a/tdigest/include/tdigest_impl.hpp b/tdigest/include/tdigest_impl.hpp
index 5835a48..1a48f88 100644
--- a/tdigest/include/tdigest_impl.hpp
+++ b/tdigest/include/tdigest_impl.hpp
@@ -240,44 +240,33 @@ string<A> tdigest<T, A>::to_string(bool print_centroids) const {
 template<typename T, typename A>
 void tdigest<T, A>::merge_buffered() {
   if (buffered_weight_ == 0) return;
-  const bool reverse = USE_ALTERNATING_SORT && reverse_merge_;
   std::copy(centroids_.begin(), centroids_.end(), std::back_inserter(buffer_));
   centroids_.clear();
   std::stable_sort(buffer_.begin(), buffer_.end(), centroid_cmp());
-  if (reverse) std::reverse(buffer_.begin(), buffer_.end());
+  if (reverse_merge_) std::reverse(buffer_.begin(), buffer_.end());
   centroids_weight_ += buffered_weight_;
   auto it = buffer_.begin();
   centroids_.push_back(*it);
   ++it;
   double weight_so_far = 0;
-  const double normalizer = scale_function().normalizer(internal_k_, centroids_weight_);
-  double k1 = scale_function().k(0, normalizer);
-  double w_limit = centroids_weight_ * scale_function().q(k1 + 1, normalizer);
   while (it != buffer_.end()) {
     const double proposed_weight = centroids_.back().get_weight() + it->get_weight();
-    bool add_this;
-    if (std::distance(buffer_.begin(), it) == 1 || std::distance(buffer_.end(), it) == 1) {
-      add_this = false;
-    } else if (USE_WEIGHT_LIMIT) {
+    bool add_this = false;
+    if (std::distance(buffer_.begin(), it) != 1 && std::distance(buffer_.end(), it) != 1) {
       const double q0 = weight_so_far / centroids_weight_;
       const double q2 = (weight_so_far + proposed_weight) / centroids_weight_;
+      const double normalizer = scale_function().normalizer(internal_k_, centroids_weight_);
       add_this = proposed_weight <= centroids_weight_ * std::min(scale_function().max(q0, normalizer), scale_function().max(q2, normalizer));
-    } else {
-      add_this = weight_so_far + proposed_weight <= w_limit;
     }
     if (add_this) {
       centroids_.back().add(*it);
     } else {
       weight_so_far += centroids_.back().get_weight();
-      if (!USE_WEIGHT_LIMIT) {
-        k1 = scale_function().k(weight_so_far / centroids_weight_, normalizer);
-        w_limit = centroids_weight_ * scale_function().q(k1 + 1, normalizer);
-      }
       centroids_.push_back(*it);
     }
     ++it;
   }
-  if (reverse) std::reverse(centroids_.begin(), centroids_.end());
+  if (reverse_merge_) std::reverse(centroids_.begin(), centroids_.end());
   if (centroids_weight_ > 0) {
     min_ = std::min(min_, centroids_.front().get_mean());
     max_ = std::max(max_, centroids_.back().get_mean());
@@ -593,15 +582,10 @@ buffer_(allocator),
 buffered_weight_(0)
 {
   if (k < 10) throw std::invalid_argument("k must be at least 10");
-  size_t fudge = 0;
-  if (USE_WEIGHT_LIMIT) {
-    fudge = 10;
-    if (k < 30) fudge +=20;
-  }
+  const size_t fudge = k < 30 ? 30 : 10;
   centroids_capacity_ = 2 * k_ + fudge;
   buffer_capacity_ = 5 * centroids_capacity_;
-  double scale = std::max(1.0, static_cast<double>(buffer_capacity_) / centroids_capacity_ - 1.0);
-  if (!USE_TWO_LEVEL_COMPRESSION) scale = 1;
+  const double scale = std::max(1.0, static_cast<double>(buffer_capacity_) / centroids_capacity_ - 1.0);
   internal_k_ = std::ceil(std::sqrt(scale) * k_);
   centroids_capacity_ = std::max(centroids_capacity_, internal_k_ + fudge);
   buffer_capacity_ = std::max(buffer_capacity_, 2 * centroids_capacity_);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org