You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/24 22:54:31 UTC

[incubator-datasketches-cpp] branch req_sketch updated: performance improvement

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/req_sketch by this push:
     new fe6b7d5  performance improvement
fe6b7d5 is described below

commit fe6b7d5794691ab20230da4366ba7be2ef22f075
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 24 14:54:16 2020 -0800

    performance improvement
---
 req/include/req_compactor.hpp      |  7 +++---
 req/include/req_compactor_impl.hpp | 44 +++++++++++++++++++++++++++-----------
 req/include/req_sketch_impl.hpp    |  9 +++-----
 3 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/req/include/req_compactor.hpp b/req/include/req_compactor.hpp
index 2c429fa..b11ae09 100755
--- a/req/include/req_compactor.hpp
+++ b/req/include/req_compactor.hpp
@@ -36,6 +36,7 @@ public:
   uint32_t get_num_items() const;
   uint32_t get_nom_capacity() const;
   uint8_t get_lg_weight() const;
+  std::vector<T, Allocator>& get_items();
   const std::vector<T, Allocator>& get_items() const;
 
   template<bool inclusive>
@@ -50,7 +51,7 @@ public:
   void sort();
   void merge_sort_in(std::vector<T, Allocator>&& items);
 
-  std::vector<T, Allocator> compact();
+  void compact(req_compactor& next);
 
   /**
    * Computes size needed to serialize the current state of the compactor.
@@ -107,8 +108,8 @@ private:
 
   static uint32_t nearest_even(float value);
 
-  template<typename Iter>
-  static std::vector<T, Allocator> get_evens_or_odds(Iter from, Iter to, bool flag);
+  template<typename InIter, typename OutIter>
+  void promote_evens_or_odds(InIter from, InIter to, bool flag, OutIter dst);
 
   // for deserialization
   req_compactor(uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::vector<T, Allocator>&& items);
diff --git a/req/include/req_compactor_impl.hpp b/req/include/req_compactor_impl.hpp
index 316378a..462b9b8 100755
--- a/req/include/req_compactor_impl.hpp
+++ b/req/include/req_compactor_impl.hpp
@@ -39,7 +39,9 @@ section_size_(section_size),
 num_sections_(req_constants::INIT_NUM_SECTIONS),
 state_(0),
 items_(allocator)
-{}
+{
+  items_.reserve(2 * get_nom_capacity());
+}
 
 template<typename T, bool H, typename C, typename A>
 bool req_compactor<T, H, C, A>::is_sorted() const {
@@ -62,6 +64,11 @@ uint8_t req_compactor<T, H, C, A>::get_lg_weight() const {
 }
 
 template<typename T, bool H, typename C, typename A>
+std::vector<T, A>& req_compactor<T, H, C, A>::get_items() {
+  return items_;
+}
+
+template<typename T, bool H, typename C, typename A>
 const std::vector<T, A>& req_compactor<T, H, C, A>::get_items() const {
   return items_;
 }
@@ -111,10 +118,20 @@ void req_compactor<T, H, C, A>::merge_sort_in(std::vector<T, A>&& items) {
   auto middle = items_.end();
   std::move(items.begin(), items.end(), std::back_inserter(items_));
   std::inplace_merge(items_.begin(), middle, items_.end(), C());
+
+  // alternative implementation
+  //  std::vector<T, A> merged(items_.get_allocator());
+  //  merged.reserve(items_.size() + items.size());
+  //  std::merge(
+  //      std::make_move_iterator(items_.begin()), std::make_move_iterator(items_.end()),
+  //      std::make_move_iterator(items.begin()), std::make_move_iterator(items.end()),
+  //      std::back_inserter(merged), C()
+  //  );
+  //  std::swap(items_, merged);
 }
 
 template<typename T, bool H, typename C, typename A>
-std::vector<T, A> req_compactor<T, H, C, A>::compact() {
+void req_compactor<T, H, C, A>::compact(req_compactor& next) {
   // choose a part of the buffer to compact
   const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
   const size_t compaction_range = compute_compaction_range(secs_to_compact);
@@ -125,12 +142,16 @@ std::vector<T, A> req_compactor<T, H, C, A>::compact() {
   if ((state_ & 1) == 1) { coin_ = !coin_; } // for odd flip coin;
   else { coin_ = req_random_bit(); } // random coin flip
 
-  auto promote = get_evens_or_odds(items_.begin() + compact_from, items_.begin() + compact_to, coin_);
+  auto& dst = next.get_items();
+  const auto num = (compact_to - compact_from) / 2;
+  if (dst.size() + num > dst.capacity()) dst.reserve(dst.size() + num);
+  auto middle = dst.end();
+  promote_evens_or_odds(items_.begin() + compact_from, items_.begin() + compact_to, coin_, std::back_inserter(dst));
+  std::inplace_merge(dst.begin(), middle, dst.end(), C());
   items_.erase(items_.begin() + compact_from, items_.begin() + compact_to);
 
   ++state_;
   ensure_enough_sections();
-  return promote;
 }
 
 template<typename T, bool H, typename C, typename A>
@@ -141,7 +162,7 @@ bool req_compactor<T, H, C, A>::ensure_enough_sections() {
     section_size_raw_ = ssr;
     section_size_ = ne;
     num_sections_ <<= 1;
-    //ensure_capacity(2 * get_nom_capacity());
+    items_.reserve(2 * get_nom_capacity());
     return true;
   }
   return false;
@@ -164,19 +185,18 @@ uint32_t req_compactor<T, H, C, A>::nearest_even(float value) {
 }
 
 template<typename T, bool H, typename C, typename A>
-template<typename Iter>
-std::vector<T, A> req_compactor<T, H, C, A>::get_evens_or_odds(Iter from, Iter to, bool odds) {
-  std::vector<T, A> result;
-  if (from == to) return result;
-  Iter i = from;
+template<typename InIter, typename OutIter>
+void req_compactor<T, H, C, A>::promote_evens_or_odds(InIter from, InIter to, bool odds, OutIter dst) {
+  if (from == to) return;
+  InIter i = from;
   if (odds) ++i;
   while (i != to) {
-    result.push_back(*i);
+    dst = std::move(*i);
+    ++dst;
     ++i;
     if (i == to) break;
     ++i;
   }
-  return result;
 }
 
 // helpers for integral types
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index fd4b445..60934c8 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -141,10 +141,7 @@ void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
   compactors_[0].append(item);
   ++num_retained_;
   ++n_;
-  if (num_retained_ == max_nom_size_) {
-    compactors_[0].sort();
-    compress();
-  }
+  if (num_retained_ == max_nom_size_) compress();
 }
 
 template<typename T, bool H, typename C, typename S, typename A>
@@ -501,11 +498,11 @@ template<typename T, bool H, typename C, typename S, typename A>
 void req_sketch<T, H, C, S, A>::compress() {
   for (size_t h = 0; h < compactors_.size(); ++h) {
     if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
+      if (h == 0) compactors_[0].sort();
       if (h + 1 >= get_num_levels()) { // at the top?
         grow(); // add a level, increases max_nom_size
       }
-      auto promoted = compactors_[h].compact();
-      compactors_[h + 1].merge_sort_in(std::move(promoted));
+      compactors_[h].compact(compactors_[h + 1]);
       update_num_retained();
       if (num_retained_ < max_nom_size_) break;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org