You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2021/05/26 02:42:08 UTC

[datasketches-cpp] branch theta_fix_and_cleanup created (now 29aebe6)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch theta_fix_and_cleanup
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git.


      at 29aebe6  cleanup

This branch includes the following new commits:

     new f005eaa  fixed union
     new 29aebe6  cleanup

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 01/02: fixed union

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch theta_fix_and_cleanup
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit f005eaad335128660df7886723e9c133c0ab7f05
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue May 25 19:41:26 2021 -0700

    fixed union
---
 theta/include/theta_union_base_impl.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/theta/include/theta_union_base_impl.hpp b/theta/include/theta_union_base_impl.hpp
index ec8ce56..254bc44 100644
--- a/theta/include/theta_union_base_impl.hpp
+++ b/theta/include/theta_union_base_impl.hpp
@@ -43,7 +43,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
   if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
   for (auto& entry: sketch) {
     const uint64_t hash = EK()(entry);
-    if (hash < union_theta_) {
+    if (hash < union_theta_ && hash < table_.theta_) {
       auto result = table_.find(hash);
       if (!result.second) {
         table_.insert(result.first, conditional_forward<SS>(entry));

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 02/02: cleanup

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch theta_fix_and_cleanup
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 29aebe60ebd6bb19b8b26a2711e592c7d3605dda
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue May 25 19:41:55 2021 -0700

    cleanup
---
 theta/include/theta_intersection.hpp            | 5 +++++
 theta/include/theta_union.hpp                   | 8 ++++----
 theta/include/theta_union_impl.hpp              | 2 +-
 theta/include/theta_update_sketch_base_impl.hpp | 4 ++--
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/theta/include/theta_intersection.hpp b/theta/include/theta_intersection.hpp
index 98a8bf1..21eb35c 100644
--- a/theta/include/theta_intersection.hpp
+++ b/theta/include/theta_intersection.hpp
@@ -41,6 +41,11 @@ public:
   };
   using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
 
+  /*
+   * Constructor
+   * @param seed for the hash function that was used to create the sketch
+   * @param allocator to use for allocating and deallocating memory
+   */
   explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
diff --git a/theta/include/theta_union.hpp b/theta/include/theta_union.hpp
index 44f9b52..8504ffb 100644
--- a/theta/include/theta_union.hpp
+++ b/theta/include/theta_union.hpp
@@ -35,13 +35,13 @@ public:
   using CompactSketch = compact_theta_sketch_alloc<Allocator>;
   using resize_factor = theta_constants::resize_factor;
 
-  struct pass_through_policy {
-    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+  struct nop_policy {
+    void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(internal_entry);
       unused(incoming_entry);
-      return internal_entry;
     }
   };
-  using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+  using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
 
   // No constructor here. Use builder instead.
   class builder;
diff --git a/theta/include/theta_union_impl.hpp b/theta/include/theta_union_impl.hpp
index 4708d70..1ccf4d4 100644
--- a/theta/include/theta_union_impl.hpp
+++ b/theta/include/theta_union_impl.hpp
@@ -24,7 +24,7 @@ namespace datasketches {
 
 template<typename A>
 theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
-state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
+state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
 {}
 
 template<typename A>
diff --git a/theta/include/theta_update_sketch_base_impl.hpp b/theta/include/theta_update_sketch_base_impl.hpp
index cb1bc2a..b91a78f 100644
--- a/theta/include/theta_update_sketch_base_impl.hpp
+++ b/theta/include/theta_update_sketch_base_impl.hpp
@@ -232,10 +232,10 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
   const size_t num_old_entries = num_entries_;
   entries_ = allocator_.allocate(size);
   for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
-  num_entries_ = 0;
+  num_entries_ = nominal_size;
   // relies on consolidating non-empty entries to the front
   for (size_t i = 0; i < nominal_size; ++i) {
-    insert(find(EK()(old_entries[i])).first, std::move(old_entries[i])); // consider a special insert with no comparison
+    new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i]));
     old_entries[i].~EN();
   }
   for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org