You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/10 22:36:48 UTC

[incubator-datasketches-cpp] branch tuple_sketch updated: intersection moving update

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/tuple_sketch by this push:
     new 2176595  intersection moving update
2176595 is described below

commit 2176595abb08f9e9869ae612656b8d487b057cc1
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jul 10 15:36:36 2020 -0700

    intersection moving update
---
 common/CMakeLists.txt                          |  1 +
 tuple/include/theta_intersection_base.hpp      |  3 ++-
 tuple/include/theta_intersection_base_impl.hpp | 16 ++++++++++------
 tuple/include/theta_union_base_impl.hpp        | 15 ++++-----------
 tuple/include/tuple_intersection.hpp           | 12 +++++++++---
 tuple/include/tuple_intersection_impl.hpp      |  5 +++--
 6 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index 7b97cde..e997d30 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -39,5 +39,6 @@ target_sources(common
     ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
 )
 
diff --git a/tuple/include/theta_intersection_base.hpp b/tuple/include/theta_intersection_base.hpp
index 3ed7f79..2057646 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -39,7 +39,8 @@ public:
 
   // TODO: copy and move
 
-  void update(const Sketch& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   CompactSketch get_result(bool ordered = true) const;
 
diff --git a/tuple/include/theta_intersection_base_impl.hpp b/tuple/include/theta_intersection_base_impl.hpp
index f0c3a98..084392b 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -21,6 +21,8 @@
 #include <sstream>
 #include <algorithm>
 
+#include "conditional_forward.hpp"
+
 namespace datasketches {
 
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -55,7 +57,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::destroy_objects() {
 }
 
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
+template<typename SS>
+void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
   if (is_empty_) return;
   if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
   is_empty_ |= sketch.is_empty();
@@ -70,18 +73,18 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
     num_entries_ = 0;
     return;
   }
-  if (!is_valid_) { // first update, clone incoming sketch
+  if (!is_valid_) { // first update, copy or move incoming sketch
     is_valid_ = true;
     lg_size_ = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
     const size_t size = 1 << lg_size_;
     entries_ = A().allocate(size);
     for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
-    for (const auto& entry: sketch) {
+    for (auto& entry: sketch) {
       auto result = theta_update_sketch_base<EN, EK, A>::find(entries_, lg_size_, EK()(entry));
       if (result.second) {
         throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
       }
-      new (result.first) EN(entry); // TODO: support move
+      new (result.first) EN(conditional_forward<SS>(entry));
       ++num_entries_;
     }
     if (num_entries_ != sketch.get_num_retained()) throw std::invalid_argument("num entries mismatch, possibly corrupted input sketch");
@@ -91,12 +94,13 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(const S& sketch) {
     matched_entries.reserve(max_matches);
     uint32_t match_count = 0;
     uint32_t count = 0;
-    for (const auto& entry: sketch) {
+    for (auto& entry: sketch) {
       if (EK()(entry) < theta_) {
         auto result = theta_update_sketch_base<EN, EK, A>::find(entries_, lg_size_, EK()(entry));
         if (result.second) {
           if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
-          matched_entries.push_back(policy_(*result.first, entry));
+          policy_(*result.first, conditional_forward<SS>(entry));
+          matched_entries.push_back(std::move(*result.first));
           ++match_count;
         }
       } else if (sketch.is_ordered()) {
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
index 4a62af2..056a72e 100644
--- a/tuple/include/theta_union_base_impl.hpp
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -19,6 +19,8 @@
 
 #include <algorithm>
 
+#include "conditional_forward.hpp"
+
 namespace datasketches {
 
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -28,15 +30,6 @@ table_(lg_cur_size, lg_nom_size, rf, p, seed),
 union_theta_(table_.theta_)
 {}
 
-template<typename Container, typename Element>
-using fwd_type = typename std::conditional<std::is_lvalue_reference<Container>::value,
-    Element, typename std::remove_reference<Element>::type&&>::type;
-
-template<typename Container, typename Element>
-fwd_type<Container, Element> forward_element(Element&& element) {
-  return std::forward<fwd_type<Container, Element>>(std::forward<Element>(element));
-}
-
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
 template<typename SS>
 void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
@@ -49,9 +42,9 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
     if (hash < union_theta_) {
       auto result = table_.find(hash);
       if (!result.second) {
-        table_.insert(result.first, forward_element<SS>(entry));
+        table_.insert(result.first, conditional_forward<SS>(entry));
       } else {
-        policy_(*result.first, forward_element<SS>(entry));
+        policy_(*result.first, conditional_forward<SS>(entry));
       }
     } else {
       if (sketch.is_ordered()) break; // early stop
diff --git a/tuple/include/tuple_intersection.hpp b/tuple/include/tuple_intersection.hpp
index ac582c3..bfab530 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -32,6 +32,9 @@ struct example_intersection_policy {
   void operator()(Summary& summary, const Summary& other) const {
     summary += other;
   }
+  void operator()(Summary& summary, Summary&& other) const {
+    summary += other;
+  }
 };
 */
 
@@ -52,9 +55,11 @@ public:
   // in terms of operations on Entry
   struct internal_policy {
     internal_policy(const Policy& policy): policy_(policy) {}
-    Entry& operator()(Entry& internal_entry, const Entry& incoming_entry) const {
+    void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
       policy_(internal_entry.second, incoming_entry.second);
-      return internal_entry;
+    }
+    void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
+      policy_(internal_entry.second, std::move(incoming_entry.second));
     }
     Policy policy_;
   };
@@ -69,7 +74,8 @@ public:
    * can reduce the current set to leave the overlapping subset only.
    * @param sketch represents input set for the intersection
    */
-  void update(const Sketch& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   /**
    * Produces a copy of the current state of the intersection.
diff --git a/tuple/include/tuple_intersection_impl.hpp b/tuple/include/tuple_intersection_impl.hpp
index f554255..ddbde8e 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -25,8 +25,9 @@ state_(seed, internal_policy(policy))
 {}
 
 template<typename S, typename P, typename A>
-void tuple_intersection<S, P, A>::update(const Sketch& sketch) {
-  state_.update(sketch);
+template<typename SS>
+void tuple_intersection<S, P, A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
 }
 
 template<typename S, typename P, typename A>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org