You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/10 22:36:48 UTC
[incubator-datasketches-cpp] branch tuple_sketch updated:
intersection moving update
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new 2176595 intersection moving update
2176595 is described below
commit 2176595abb08f9e9869ae612656b8d487b057cc1
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jul 10 15:36:36 2020 -0700
intersection moving update
---
common/CMakeLists.txt | 1 +
tuple/include/theta_intersection_base.hpp | 3 ++-
tuple/include/theta_intersection_base_impl.hpp | 16 ++++++++++------
tuple/include/theta_union_base_impl.hpp | 15 ++++-----------
tuple/include/tuple_intersection.hpp | 12 +++++++++---
tuple/include/tuple_intersection_impl.hpp | 5 +++--
6 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 7b97cde..e997d30 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -39,5 +39,6 @@ target_sources(common
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
)
diff --git a/tuple/include/theta_intersection_base.hpp b/tuple/include/theta_intersection_base.hpp
index 3ed7f79..2057646 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -39,7 +39,8 @@ public:
// TODO: copy and move
- void update(const Sketch& sketch);
+ template<typename FwdSketch>
+ void update(FwdSketch&& sketch);
CompactSketch get_result(bool ordered = true) const;
diff --git a/tuple/include/theta_intersection_base_impl.hpp b/tuple/include/theta_intersection_base_impl.hpp
index f0c3a98..084392b 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -21,6 +21,8 @@
#include <sstream>
#include <algorithm>
+#include "conditional_forward.hpp"
+
namespace datasketches {
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -55,7 +57,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::destroy_objects() {
}
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
+template<typename SS>
+void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
if (is_empty_) return;
if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
is_empty_ |= sketch.is_empty();
@@ -70,18 +73,18 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
num_entries_ = 0;
return;
}
- if (!is_valid_) { // first update, clone incoming sketch
+ if (!is_valid_) { // first update, copy or move incoming sketch
is_valid_ = true;
lg_size_ = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
const size_t size = 1 << lg_size_;
entries_ = A().allocate(size);
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
- for (const auto& entry: sketch) {
+ for (auto& entry: sketch) {
auto result = theta_update_sketch_base<EN, EK, A>::find(entries_, lg_size_, EK()(entry));
if (result.second) {
throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
}
- new (result.first) EN(entry); // TODO: support move
+ new (result.first) EN(conditional_forward<SS>(entry));
++num_entries_;
}
if (num_entries_ != sketch.get_num_retained()) throw std::invalid_argument("num entries mismatch, possibly corrupted input sketch");
@@ -91,12 +94,13 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
matched_entries.reserve(max_matches);
uint32_t match_count = 0;
uint32_t count = 0;
- for (const auto& entry: sketch) {
+ for (auto& entry: sketch) {
if (EK()(entry) < theta_) {
auto result = theta_update_sketch_base<EN, EK, A>::find(entries_, lg_size_, EK()(entry));
if (result.second) {
if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
- matched_entries.push_back(policy_(*result.first, entry));
+ policy_(*result.first, conditional_forward<SS>(entry));
+ matched_entries.push_back(std::move(*result.first));
++match_count;
}
} else if (sketch.is_ordered()) {
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
index 4a62af2..056a72e 100644
--- a/tuple/include/theta_union_base_impl.hpp
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -19,6 +19,8 @@
#include <algorithm>
+#include "conditional_forward.hpp"
+
namespace datasketches {
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -28,15 +30,6 @@ table_(lg_cur_size, lg_nom_size, rf, p, seed),
union_theta_(table_.theta_)
{}
-template<typename Container, typename Element>
-using fwd_type = typename std::conditional<std::is_lvalue_reference<Container>::value,
- Element, typename std::remove_reference<Element>::type&&>::type;
-
-template<typename Container, typename Element>
-fwd_type<Container, Element> forward_element(Element&& element) {
- return std::forward<fwd_type<Container, Element>>(std::forward<Element>(element));
-}
-
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
template<typename SS>
void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
@@ -49,9 +42,9 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
if (hash < union_theta_) {
auto result = table_.find(hash);
if (!result.second) {
- table_.insert(result.first, forward_element<SS>(entry));
+ table_.insert(result.first, conditional_forward<SS>(entry));
} else {
- policy_(*result.first, forward_element<SS>(entry));
+ policy_(*result.first, conditional_forward<SS>(entry));
}
} else {
if (sketch.is_ordered()) break; // early stop
diff --git a/tuple/include/tuple_intersection.hpp b/tuple/include/tuple_intersection.hpp
index ac582c3..bfab530 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -32,6 +32,9 @@ struct example_intersection_policy {
void operator()(Summary& summary, const Summary& other) const {
summary += other;
}
+ void operator()(Summary& summary, Summary&& other) const {
+ summary += other;
+ }
};
*/
@@ -52,9 +55,11 @@ public:
// in terms of operations on Entry
struct internal_policy {
internal_policy(const Policy& policy): policy_(policy) {}
- Entry& operator()(Entry& internal_entry, const Entry& incoming_entry) const {
+ void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
policy_(internal_entry.second, incoming_entry.second);
- return internal_entry;
+ }
+ void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
+ policy_(internal_entry.second, std::move(incoming_entry.second));
}
Policy policy_;
};
@@ -69,7 +74,8 @@ public:
* can reduce the current set to leave the overlapping subset only.
* @param sketch represents input set for the intersection
*/
- void update(const Sketch& sketch);
+ template<typename FwdSketch>
+ void update(FwdSketch&& sketch);
/**
* Produces a copy of the current state of the intersection.
diff --git a/tuple/include/tuple_intersection_impl.hpp b/tuple/include/tuple_intersection_impl.hpp
index f554255..ddbde8e 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -25,8 +25,9 @@ state_(seed, internal_policy(policy))
{}
template<typename S, typename P, typename A>
-void tuple_intersection<S, P, A>::update(const Sketch& sketch) {
- state_.update(sketch);
+template<typename SS>
+void tuple_intersection<S, P, A>::update(SS&& sketch) {
+ state_.update(std::forward<SS>(sketch));
}
template<typename S, typename P, typename A>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org