You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/16 22:22:58 UTC

[incubator-datasketches-cpp] branch tuple_sketch updated: stateful allocator support

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/tuple_sketch by this push:
     new 1cc3e30  stateful allocator support
1cc3e30 is described below

commit 1cc3e30ddfbe5912fbd2002c297b19400c458290
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Jul 16 15:22:46 2020 -0700

    stateful allocator support
---
 tuple/include/theta_set_difference_base.hpp      |  3 ++-
 tuple/include/theta_set_difference_base_impl.hpp |  5 ++--
 tuple/include/theta_sketch_experimental.hpp      |  5 +++-
 tuple/include/theta_sketch_experimental_impl.hpp | 12 +++++++---
 tuple/include/theta_union_base.hpp               |  2 +-
 tuple/include/theta_union_base_impl.hpp          |  5 ++--
 tuple/include/theta_union_experimental.hpp       |  6 ++++-
 tuple/include/theta_union_experimental_impl.hpp  |  9 ++++---
 tuple/include/theta_update_sketch_base.hpp       |  6 ++---
 tuple/include/theta_update_sketch_base_impl.hpp  | 30 +++++++-----------------
 tuple/include/tuple_sketch.hpp                   |  5 ++--
 tuple/include/tuple_sketch_impl.hpp              | 10 ++++----
 tuple/include/tuple_union.hpp                    |  5 ++--
 tuple/include/tuple_union_impl.hpp               | 10 ++++----
 14 files changed, 60 insertions(+), 53 deletions(-)

diff --git a/tuple/include/theta_set_difference_base.hpp b/tuple/include/theta_set_difference_base.hpp
index 0c84129..0142b5c 100644
--- a/tuple/include/theta_set_difference_base.hpp
+++ b/tuple/include/theta_set_difference_base.hpp
@@ -38,12 +38,13 @@ public:
   using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
   using hash_table = theta_update_sketch_base<uint64_t, trivial_extract_key, AllocU64>;
 
-  theta_set_difference_base(uint64_t seed);
+  theta_set_difference_base(uint64_t seed, const Allocator& allocator = Allocator());
 
   template<typename SS>
   CompactSketch compute(SS&& a, const Sketch& b, bool ordered) const;
 
 private:
+  Allocator allocator_;
   uint16_t seed_hash_;
 };
 
diff --git a/tuple/include/theta_set_difference_base_impl.hpp b/tuple/include/theta_set_difference_base_impl.hpp
index 98c7ade..6e4d553 100644
--- a/tuple/include/theta_set_difference_base_impl.hpp
+++ b/tuple/include/theta_set_difference_base_impl.hpp
@@ -25,7 +25,8 @@
 namespace datasketches {
 
 template<typename EN, typename EK, typename S, typename CS, typename A>
-theta_set_difference_base<EN, EK, S, CS, A>::theta_set_difference_base(uint64_t seed):
+theta_set_difference_base<EN, EK, S, CS, A>::theta_set_difference_base(uint64_t seed, const A& allocator):
+allocator_(allocator),
 seed_hash_(compute_seed_hash(seed))
 {}
 
@@ -49,7 +50,7 @@ CS theta_set_difference_base<EN, EK, S, CS, A>::compute(SS&& a, const S& b, bool
           conditional_back_inserter(entries, key_less_than<uint64_t, EN, EK>(theta)), comparator());
     } else { // hash-based
       const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), hash_table::REBUILD_THRESHOLD);
-      hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 1, 0); // seed is not used here
+      hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 1, 0, allocator_); // seed is not used here
       for (const auto& entry: b) {
         const uint64_t hash = EK()(entry);
         if (hash < theta) {
diff --git a/tuple/include/theta_sketch_experimental.hpp b/tuple/include/theta_sketch_experimental.hpp
index fff6623..4a081d1 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -38,7 +38,10 @@ public:
 
   class builder: public theta_base_builder<builder> {
   public:
+      builder(const A& allocator = A());
       theta_sketch_experimental build() const;
+  private:
+      A allocator_;
   };
 
   bool is_empty() const { return table_.is_empty_; }
@@ -67,7 +70,7 @@ private:
   using theta_table = theta_update_sketch_base<uint64_t, trivial_extract_key, A>;
   theta_table table_;
 
-  theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+  theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator);
 };
 
 template<typename A = std::allocator<uint64_t>>
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp b/tuple/include/theta_sketch_experimental_impl.hpp
index b8a8bbc..68d8c93 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -24,8 +24,9 @@ namespace datasketches {
 // experimental update theta sketch derived from the same base as tuple sketch
 
 template<typename A>
-theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-table_(lg_cur_size, lg_nom_size, rf, p, seed)
+theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    float p, uint64_t seed, const A& allocator):
+table_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
 {}
 
 template<typename A>
@@ -104,9 +105,14 @@ auto theta_sketch_experimental<A>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
 
+// builder
+
+template<typename A>
+theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+
 template<typename A>
 theta_sketch_experimental<A> theta_sketch_experimental<A>::builder::build() const {
-  return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_);
+  return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, allocator_);
 }
 
 template<typename A>
diff --git a/tuple/include/theta_union_base.hpp b/tuple/include/theta_union_base.hpp
index 6e3823f..03e71fd 100644
--- a/tuple/include/theta_union_base.hpp
+++ b/tuple/include/theta_union_base.hpp
@@ -38,7 +38,7 @@ public:
   using resize_factor = typename hash_table::resize_factor;
   using comparator = compare_by_key<ExtractKey>;
 
-  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
 
   template<typename FwdSketch>
   void update(FwdSketch&& sketch);
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
index 056a72e..a1015ef 100644
--- a/tuple/include/theta_union_base_impl.hpp
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -24,9 +24,10 @@
 namespace datasketches {
 
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    float p, uint64_t seed, const P& policy, const A& allocator):
 policy_(policy),
-table_(lg_cur_size, lg_nom_size, rf, p, seed),
+table_(lg_cur_size, lg_nom_size, rf, p, seed, allocator),
 union_theta_(table_.theta_)
 {}
 
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 5fc7faf..5a5cb84 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -67,18 +67,22 @@ private:
   State state_;
 
   // for builder
-  theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+  theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Allocator& allocator);
 };
 
 template<typename A>
 class theta_union_experimental<A>::builder: public theta_base_builder<theta_union_experimental<A>::builder> {
 public:
+  builder(const A& allocator = A());
 
   /**
    * This is to create an instance of the union with predefined parameters.
    * @return an instance of the union
    */
   theta_union_experimental<A> build() const;
+
+private:
+  A allocator_;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental_impl.hpp b/tuple/include/theta_union_experimental_impl.hpp
index 0fd0443..51fe13e 100644
--- a/tuple/include/theta_union_experimental_impl.hpp
+++ b/tuple/include/theta_union_experimental_impl.hpp
@@ -20,8 +20,8 @@
 namespace datasketches {
 
 template<typename A>
-theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-state_(lg_cur_size, lg_nom_size, rf, p, seed, pass_through_policy())
+theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, pass_through_policy(), allocator)
 {}
 
 template<typename A>
@@ -35,10 +35,13 @@ auto theta_union_experimental<A>::get_result(bool ordered) const -> CompactSketc
 }
 
 template<typename A>
+theta_union_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+
+template<typename A>
 auto theta_union_experimental<A>::builder::build() const -> theta_union_experimental {
   return theta_union_experimental(
       this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
-      this->lg_k_, this->rf_, this->p_, this->seed_);
+      this->lg_k_, this->rf_, this->p_, this->seed_, allocator_);
 }
 
 } /* namespace datasketches */
diff --git a/tuple/include/theta_update_sketch_base.hpp b/tuple/include/theta_update_sketch_base.hpp
index 46d9cf6..70e3fe5 100644
--- a/tuple/include/theta_update_sketch_base.hpp
+++ b/tuple/include/theta_update_sketch_base.hpp
@@ -43,7 +43,8 @@ struct theta_update_sketch_base {
   using resize_factor = theta_constants::resize_factor;
   using comparator = compare_by_key<ExtractKey>;
 
-  theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+  theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p,
+      uint64_t seed, const Allocator& allocator);
   // TODO: copy and move
   ~theta_update_sketch_base();
 
@@ -59,8 +60,6 @@ struct theta_update_sketch_base {
   iterator begin() const;
   iterator end() const;
 
-  string<Allocator> to_string() const;
-
   // resize threshold = 0.5 tuned for speed
   static constexpr double RESIZE_THRESHOLD = 0.5;
   // hash table rebuild threshold = 15/16
@@ -69,6 +68,7 @@ struct theta_update_sketch_base {
   static constexpr uint8_t STRIDE_HASH_BITS = 7;
   static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
 
+  Allocator allocator_;
   bool is_empty_;
   uint8_t lg_cur_size_;
   uint8_t lg_nom_size_;
diff --git a/tuple/include/theta_update_sketch_base_impl.hpp b/tuple/include/theta_update_sketch_base_impl.hpp
index 793113b..01a1bcb 100644
--- a/tuple/include/theta_update_sketch_base_impl.hpp
+++ b/tuple/include/theta_update_sketch_base_impl.hpp
@@ -24,7 +24,8 @@
 namespace datasketches {
 
 template<typename EN, typename EK, typename A>
-theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator):
+allocator_(allocator),
 is_empty_(true),
 lg_cur_size_(lg_cur_size),
 lg_nom_size_(lg_nom_size),
@@ -35,7 +36,7 @@ seed_(seed),
 entries_(nullptr)
 {
   const size_t size = 1 << lg_cur_size;
-  entries_ = A().allocate(size);
+  entries_ = allocator_.allocate(size);
   for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
   if (p < 1) this->theta_ *= p;
 }
@@ -47,7 +48,7 @@ theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
   for (size_t i = 0; i < size; ++i) {
     if (EK()(entries_[i]) != 0) entries_[i].~EN();
   }
-  A().deallocate(entries_, size);
+  allocator_.deallocate(entries_, size);
 }
 
 template<typename EN, typename EK, typename A>
@@ -108,21 +109,6 @@ auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
 }
 
 template<typename EN, typename EK, typename A>
-string<A> theta_update_sketch_base<EN, EK, A>::to_string() const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  auto type = typeid(*this).name();
-  os << "type: " << type << std::endl;
-  os << "sizeof: " << sizeof(*this) << std::endl;
-  os << "is_empty:    " << (is_empty_ ? "true" : "false") << std::endl;
-  os << "lg_cur_size: " << std::to_string(lg_cur_size_) << std::endl;
-  os << "lg_nom_size: " << std::to_string(lg_nom_size_) << std::endl;
-  os << "num_entries: " << num_entries_ << std::endl;
-  os << "theta (as long): " << theta_ << std::endl;
-  os << "theta (as fraction): " << static_cast<double>(theta_) / theta_constants::MAX_THETA << std::endl;
-  return os.str();
-}
-
-template<typename EN, typename EK, typename A>
 uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
   const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
   return std::floor(fraction * (1 << lg_cur_size));
@@ -142,7 +128,7 @@ void theta_update_sketch_base<EN, EK, A>::resize() {
   lg_cur_size_ += factor;
   const size_t new_size = 1 << lg_cur_size_;
   EN* old_entries = entries_;
-  entries_ = A().allocate(new_size);
+  entries_ = allocator_.allocate(new_size);
   for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
   num_entries_ = 0;
   for (size_t i = 0; i < old_size; ++i) {
@@ -152,7 +138,7 @@ void theta_update_sketch_base<EN, EK, A>::resize() {
       old_entries[i].~EN();
     }
   }
-  A().deallocate(old_entries, old_size);
+  allocator_.deallocate(old_entries, old_size);
 }
 
 template<typename EN, typename EK, typename A>
@@ -162,7 +148,7 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
   std::nth_element(&entries_[0], &entries_[pivot], &entries_[size], comparator());
   this->theta_ = EK()(entries_[pivot]);
   EN* old_entries = entries_;
-  entries_ = A().allocate(size);
+  entries_ = allocator_.allocate(size);
   for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
   num_entries_ = 0;
   for (size_t i = 0; i < size; ++i) {
@@ -172,7 +158,7 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
       old_entries[i].~EN();
     }
   }
-  A().deallocate(old_entries, size);
+  allocator_.deallocate(old_entries, size);
 }
 
 template<typename EN, typename EK, typename A>
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 90d2046..f7f228e 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -317,7 +317,7 @@ private:
   tuple_map map_;
 
   // for builder
-  update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+  update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
 
   virtual void print_specifics(std::ostringstream& os) const;
 };
@@ -448,7 +448,7 @@ public:
   /**
    * Creates and instance of the builder with default parameters.
    */
-  builder(const P& policy = P());
+  builder(const P& policy = P(), const A& allocator = A());
 
   /**
    * This is to create an instance of the sketch with predefined parameters.
@@ -458,6 +458,7 @@ public:
 
 private:
   P policy_;
+  A allocator_;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 75fa337..a972d8a 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -82,9 +82,9 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
 // update sketch
 
 template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy, const A& allocator):
 policy_(policy),
-map_(lg_cur_size, lg_nom_size, rf, p, seed)
+map_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
 {}
 
 template<typename S, typename U, typename P, typename A>
@@ -531,12 +531,12 @@ void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
 // builder
 
 template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy):
-policy_(policy) {}
+update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
+policy_(policy), allocator_(allocator) {}
 
 template<typename S, typename U, typename P, typename A>
 auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
-  return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
+  return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_, allocator_);
 }
 
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
index 0ee776c..d716664 100644
--- a/tuple/include/tuple_union.hpp
+++ b/tuple/include/tuple_union.hpp
@@ -83,7 +83,7 @@ private:
   State state_;
 
   // for builder
-  tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+  tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
 };
 
 template<typename S, typename P, typename A>
@@ -92,7 +92,7 @@ public:
   /**
    * Creates and instance of the builder with default parameters.
    */
-  builder(const P& policy = P());
+  builder(const P& policy = P(), const A& allocator = A());
 
   /**
    * This is to create an instance of the union with predefined parameters.
@@ -102,6 +102,7 @@ public:
 
 private:
   P policy_;
+  A allocator_;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
index 9f471ac..6df2794 100644
--- a/tuple/include/tuple_union_impl.hpp
+++ b/tuple/include/tuple_union_impl.hpp
@@ -20,8 +20,8 @@
 namespace datasketches {
 
 template<typename S, typename P, typename A>
-tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
-state_(lg_cur_size, lg_nom_size, rf, p, seed, internal_policy(policy))
+tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, internal_policy(policy), allocator)
 {}
 
 template<typename S, typename P, typename A>
@@ -36,12 +36,12 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
 }
 
 template<typename S, typename P, typename A>
-tuple_union<S, P, A>::builder::builder(const P& policy):
-policy_(policy) {}
+tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
+policy_(policy), allocator_(allocator) {}
 
 template<typename S, typename P, typename A>
 auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
-  return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
+  return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_, allocator_);
 }
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org