You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/24 22:54:31 UTC
[incubator-datasketches-cpp] branch req_sketch updated: performance
improvement
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/req_sketch by this push:
new fe6b7d5 performance improvement
fe6b7d5 is described below
commit fe6b7d5794691ab20230da4366ba7be2ef22f075
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 24 14:54:16 2020 -0800
performance improvement
---
req/include/req_compactor.hpp | 7 +++---
req/include/req_compactor_impl.hpp | 44 +++++++++++++++++++++++++++-----------
req/include/req_sketch_impl.hpp | 9 +++-----
3 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/req/include/req_compactor.hpp b/req/include/req_compactor.hpp
index 2c429fa..b11ae09 100755
--- a/req/include/req_compactor.hpp
+++ b/req/include/req_compactor.hpp
@@ -36,6 +36,7 @@ public:
uint32_t get_num_items() const;
uint32_t get_nom_capacity() const;
uint8_t get_lg_weight() const;
+ std::vector<T, Allocator>& get_items();
const std::vector<T, Allocator>& get_items() const;
template<bool inclusive>
@@ -50,7 +51,7 @@ public:
void sort();
void merge_sort_in(std::vector<T, Allocator>&& items);
- std::vector<T, Allocator> compact();
+ void compact(req_compactor& next);
/**
* Computes size needed to serialize the current state of the compactor.
@@ -107,8 +108,8 @@ private:
static uint32_t nearest_even(float value);
- template<typename Iter>
- static std::vector<T, Allocator> get_evens_or_odds(Iter from, Iter to, bool flag);
+ template<typename InIter, typename OutIter>
+ void promote_evens_or_odds(InIter from, InIter to, bool flag, OutIter dst);
// for deserialization
req_compactor(uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::vector<T, Allocator>&& items);
diff --git a/req/include/req_compactor_impl.hpp b/req/include/req_compactor_impl.hpp
index 316378a..462b9b8 100755
--- a/req/include/req_compactor_impl.hpp
+++ b/req/include/req_compactor_impl.hpp
@@ -39,7 +39,9 @@ section_size_(section_size),
num_sections_(req_constants::INIT_NUM_SECTIONS),
state_(0),
items_(allocator)
-{}
+{
+ items_.reserve(2 * get_nom_capacity());
+}
template<typename T, bool H, typename C, typename A>
bool req_compactor<T, H, C, A>::is_sorted() const {
@@ -62,6 +64,11 @@ uint8_t req_compactor<T, H, C, A>::get_lg_weight() const {
}
template<typename T, bool H, typename C, typename A>
+std::vector<T, A>& req_compactor<T, H, C, A>::get_items() {
+ return items_;
+}
+
+template<typename T, bool H, typename C, typename A>
const std::vector<T, A>& req_compactor<T, H, C, A>::get_items() const {
return items_;
}
@@ -111,10 +118,20 @@ void req_compactor<T, H, C, A>::merge_sort_in(std::vector<T, A>&& items) {
auto middle = items_.end();
std::move(items.begin(), items.end(), std::back_inserter(items_));
std::inplace_merge(items_.begin(), middle, items_.end(), C());
+
+ // alternative implementation
+ // std::vector<T, A> merged(items_.get_allocator());
+ // merged.reserve(items_.size() + items.size());
+ // std::merge(
+ // std::make_move_iterator(items_.begin()), std::make_move_iterator(items_.end()),
+ // std::make_move_iterator(items.begin()), std::make_move_iterator(items.end()),
+ // std::back_inserter(merged), C()
+ // );
+ // std::swap(items_, merged);
}
template<typename T, bool H, typename C, typename A>
-std::vector<T, A> req_compactor<T, H, C, A>::compact() {
+void req_compactor<T, H, C, A>::compact(req_compactor& next) {
// choose a part of the buffer to compact
const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
const size_t compaction_range = compute_compaction_range(secs_to_compact);
@@ -125,12 +142,16 @@ std::vector<T, A> req_compactor<T, H, C, A>::compact() {
if ((state_ & 1) == 1) { coin_ = !coin_; } // for odd flip coin;
else { coin_ = req_random_bit(); } // random coin flip
- auto promote = get_evens_or_odds(items_.begin() + compact_from, items_.begin() + compact_to, coin_);
+ auto& dst = next.get_items();
+ const auto num = (compact_to - compact_from) / 2;
+ if (dst.size() + num > dst.capacity()) dst.reserve(dst.size() + num);
+ auto middle = dst.end();
+ promote_evens_or_odds(items_.begin() + compact_from, items_.begin() + compact_to, coin_, std::back_inserter(dst));
+ std::inplace_merge(dst.begin(), middle, dst.end(), C());
items_.erase(items_.begin() + compact_from, items_.begin() + compact_to);
++state_;
ensure_enough_sections();
- return promote;
}
template<typename T, bool H, typename C, typename A>
@@ -141,7 +162,7 @@ bool req_compactor<T, H, C, A>::ensure_enough_sections() {
section_size_raw_ = ssr;
section_size_ = ne;
num_sections_ <<= 1;
- //ensure_capacity(2 * get_nom_capacity());
+ items_.reserve(2 * get_nom_capacity());
return true;
}
return false;
@@ -164,19 +185,18 @@ uint32_t req_compactor<T, H, C, A>::nearest_even(float value) {
}
template<typename T, bool H, typename C, typename A>
-template<typename Iter>
-std::vector<T, A> req_compactor<T, H, C, A>::get_evens_or_odds(Iter from, Iter to, bool odds) {
- std::vector<T, A> result;
- if (from == to) return result;
- Iter i = from;
+template<typename InIter, typename OutIter>
+void req_compactor<T, H, C, A>::promote_evens_or_odds(InIter from, InIter to, bool odds, OutIter dst) {
+ if (from == to) return;
+ InIter i = from;
if (odds) ++i;
while (i != to) {
- result.push_back(*i);
+ dst = std::move(*i);
+ ++dst;
++i;
if (i == to) break;
++i;
}
- return result;
}
// helpers for integral types
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index fd4b445..60934c8 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -141,10 +141,7 @@ void req_sketch<T, H, C, S, A>::update(FwdT&& item) {
compactors_[0].append(item);
++num_retained_;
++n_;
- if (num_retained_ == max_nom_size_) {
- compactors_[0].sort();
- compress();
- }
+ if (num_retained_ == max_nom_size_) compress();
}
template<typename T, bool H, typename C, typename S, typename A>
@@ -501,11 +498,11 @@ template<typename T, bool H, typename C, typename S, typename A>
void req_sketch<T, H, C, S, A>::compress() {
for (size_t h = 0; h < compactors_.size(); ++h) {
if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
+ if (h == 0) compactors_[0].sort();
if (h + 1 >= get_num_levels()) { // at the top?
grow(); // add a level, increases max_nom_size
}
- auto promoted = compactors_[h].compact();
- compactors_[h + 1].merge_sort_in(std::move(promoted));
+ compactors_[h].compact(compactors_[h + 1]);
update_num_retained();
if (num_retained_ < max_nom_size_) break;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org