You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by wl...@apache.org on 2023/06/02 17:40:47 UTC

[datasketches-cpp] 01/01: Parameterizing underlying hash table in tuple and theta sketches

This is an automated email from the ASF dual-hosted git repository.

wlauer pushed a commit to branch inplace
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 5d918d059d97ce07048303975f1db5ea7b32e318
Author: Will Lauer <wl...@yahooinc.com>
AuthorDate: Fri Jun 2 12:40:39 2023 -0500

    Parameterizing underlying hash table in tuple and theta sketches
---
 theta/include/theta_sketch.hpp                |  14 +--
 theta/include/theta_sketch_impl.hpp           | 129 +++++++++++++-------------
 theta/include/theta_union.hpp                 |  17 ++--
 theta/include/theta_union_base.hpp            |   7 +-
 theta/include/theta_union_base_impl.hpp       |  23 +++--
 theta/include/theta_union_impl.hpp            |  30 +++---
 tuple/include/array_of_doubles_union_impl.hpp |   2 +-
 tuple/include/tuple_sketch.hpp                |  18 ++--
 tuple/include/tuple_sketch_impl.hpp           | 124 ++++++++++++-------------
 tuple/include/tuple_union.hpp                 |  14 ++-
 tuple/include/tuple_union_impl.hpp            |  28 +++---
 11 files changed, 211 insertions(+), 195 deletions(-)

diff --git a/theta/include/theta_sketch.hpp b/theta/include/theta_sketch.hpp
index 4bc1c2e..36e8721 100644
--- a/theta/include/theta_sketch.hpp
+++ b/theta/include/theta_sketch.hpp
@@ -149,7 +149,8 @@ protected:
 // forward declaration
 template<typename A> class compact_theta_sketch_alloc;
 
-template<typename Allocator = std::allocator<uint64_t>>
+template<typename Allocator = std::allocator<uint64_t>,
+        template<typename, typename, typename> class Table = theta_update_sketch_base>
 class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
 public:
   using Base = theta_sketch_alloc<Allocator>;
@@ -157,7 +158,7 @@ public:
   using ExtractKey = typename Base::ExtractKey;
   using iterator = typename Base::iterator;
   using const_iterator = typename Base::const_iterator;
-  using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using theta_table = Table<Entry, ExtractKey, Allocator>;
   using resize_factor = typename theta_table::resize_factor;
 
   // No constructor here. Use builder instead.
@@ -301,8 +302,7 @@ private:
   theta_table table_;
 
   // for builder
-  update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p,
-      uint64_t theta, uint64_t seed, const Allocator& allocator);
+  update_theta_sketch_alloc(theta_table&& table);
 
   virtual void print_specifics(std::ostringstream& os) const;
 };
@@ -425,9 +425,11 @@ private:
   virtual void print_specifics(std::ostringstream& os) const;
 };
 
-template<typename Allocator>
-class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
+template<typename Allocator,template<typename, typename, typename> class Table>
+class update_theta_sketch_alloc<Allocator, Table>::builder: public theta_base_builder<builder, Allocator> {
 public:
+    using ThetaTable = update_theta_sketch_alloc::theta_table;
+
     builder(const Allocator& allocator = Allocator());
     update_theta_sketch_alloc build() const;
 };
diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp
index e5e5050..175b788 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -96,110 +96,109 @@ void theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
 
 // update sketch
 
-template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
-    float p, uint64_t theta, uint64_t seed, const A& allocator):
-table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
+template<typename A, template<typename, typename, typename> class T>
+update_theta_sketch_alloc<A,T>::update_theta_sketch_alloc(theta_table&& table):
+table_{table}
 {}
 
-template<typename A>
-A update_theta_sketch_alloc<A>::get_allocator() const {
+template<typename A, template<typename, typename, typename> class T>
+A update_theta_sketch_alloc<A,T>::get_allocator() const {
   return table_.allocator_;
 }
 
-template<typename A>
-bool update_theta_sketch_alloc<A>::is_empty() const {
+template<typename A, template<typename, typename, typename> class T>
+bool update_theta_sketch_alloc<A,T>::is_empty() const {
   return table_.is_empty_;
 }
 
-template<typename A>
-bool update_theta_sketch_alloc<A>::is_ordered() const {
+template<typename A, template<typename, typename, typename> class T>
+bool update_theta_sketch_alloc<A,T>::is_ordered() const {
   return table_.num_entries_ > 1 ? false : true;
 }
 
-template<typename A>
-uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
+template<typename A, template<typename, typename, typename> class T>
+uint64_t update_theta_sketch_alloc<A,T>::get_theta64() const {
   return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
 }
 
-template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
+template<typename A, template<typename, typename, typename> class T>
+uint32_t update_theta_sketch_alloc<A,T>::get_num_retained() const {
   return table_.num_entries_;
 }
 
-template<typename A>
-uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
+template<typename A, template<typename, typename, typename> class T>
+uint16_t update_theta_sketch_alloc<A,T>::get_seed_hash() const {
   return compute_seed_hash(table_.seed_);
 }
 
-template<typename A>
-uint8_t update_theta_sketch_alloc<A>::get_lg_k() const {
+template<typename A, template<typename, typename, typename> class T>
+uint8_t update_theta_sketch_alloc<A,T>::get_lg_k() const {
   return table_.lg_nom_size_;
 }
 
-template<typename A>
-auto update_theta_sketch_alloc<A>::get_rf() const -> resize_factor {
+template<typename A, template<typename, typename, typename> class T>
+auto update_theta_sketch_alloc<A,T>::get_rf() const -> resize_factor {
   return table_.rf_;
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(uint64_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(uint64_t value) {
   update(&value, sizeof(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(int64_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(int64_t value) {
   update(&value, sizeof(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(uint32_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(uint32_t value) {
   update(static_cast<int32_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(int32_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(int32_t value) {
   update(static_cast<int64_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(uint16_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(uint16_t value) {
   update(static_cast<int16_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(int16_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(int16_t value) {
   update(static_cast<int64_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(uint8_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(uint8_t value) {
   update(static_cast<int8_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(int8_t value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(int8_t value) {
   update(static_cast<int64_t>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(double value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(double value) {
   update(canonical_double(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(float value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(float value) {
   update(static_cast<double>(value));
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(const std::string& value) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(const std::string& value) {
   if (value.empty()) return;
   update(value.c_str(), value.length());
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::update(const void* data, size_t length) {
   const uint64_t hash = table_.hash_and_screen(data, length);
   if (hash == 0) return;
   auto result = table_.find(hash);
@@ -208,43 +207,43 @@ void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
   }
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::trim() {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::trim() {
   table_.trim();
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::reset() {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::reset() {
   table_.reset();
 }
 
-template<typename A>
-auto update_theta_sketch_alloc<A>::begin() -> iterator {
+template<typename A, template<typename, typename, typename> class T>
+auto update_theta_sketch_alloc<A,T>::begin() -> iterator {
   return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
-template<typename A>
-auto update_theta_sketch_alloc<A>::end() -> iterator {
+template<typename A, template<typename, typename, typename> class T>
+auto update_theta_sketch_alloc<A,T>::end() -> iterator {
   return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
 
-template<typename A>
-auto update_theta_sketch_alloc<A>::begin() const -> const_iterator {
+template<typename A, template<typename, typename, typename> class T>
+auto update_theta_sketch_alloc<A,T>::begin() const -> const_iterator {
   return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
-template<typename A>
-auto update_theta_sketch_alloc<A>::end() const -> const_iterator {
+template<typename A, template<typename, typename, typename> class T>
+auto update_theta_sketch_alloc<A,T>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
 
-template<typename A>
-compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
+template<typename A, template<typename, typename, typename> class T>
+compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A,T>::compact(bool ordered) const {
   return compact_theta_sketch_alloc<A>(*this, ordered);
 }
 
-template<typename A>
-void update_theta_sketch_alloc<A>::print_specifics(std::ostringstream& os) const {
+template<typename A, template<typename, typename, typename> class T>
+void update_theta_sketch_alloc<A,T>::print_specifics(std::ostringstream& os) const {
   os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
   os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
   os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
@@ -252,12 +251,12 @@ void update_theta_sketch_alloc<A>::print_specifics(std::ostringstream& os) const
 
 // builder
 
-template<typename A>
-update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
+template<typename A, template<typename, typename, typename> class T>
+update_theta_sketch_alloc<A,T>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
 
-template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
-  return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
+template<typename A, template<typename, typename, typename> class T>
+update_theta_sketch_alloc<A,T> update_theta_sketch_alloc<A,T>::builder::build() const {
+  return update_theta_sketch_alloc(ThetaTable(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_));
 }
 
 // compact sketch
diff --git a/theta/include/theta_union.hpp b/theta/include/theta_union.hpp
index d90c53a..4c7019f 100644
--- a/theta/include/theta_union.hpp
+++ b/theta/include/theta_union.hpp
@@ -26,7 +26,8 @@
 
 namespace datasketches {
 
-template<typename Allocator = std::allocator<uint64_t>>
+template<typename Allocator = std::allocator<uint64_t>,
+        template<typename, typename, typename> class Table = theta_update_sketch_base>
 class theta_union_alloc {
 public:
   using Entry = uint64_t;
@@ -34,6 +35,8 @@ public:
   using Sketch = theta_sketch_alloc<Allocator>;
   using CompactSketch = compact_theta_sketch_alloc<Allocator>;
   using resize_factor = theta_constants::resize_factor;
+  using theta_table = Table<Entry, ExtractKey, Allocator>;
+
 
   struct nop_policy {
     void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
@@ -41,7 +44,7 @@ public:
       unused(incoming_entry);
     }
   };
-  using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
+  using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator, Table>;
 
   // No constructor here. Use builder instead.
   class builder;
@@ -69,19 +72,21 @@ private:
   State state_;
 
   // for builder
-  theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
+  theta_union_alloc(theta_table&& table);
 };
 
-template<typename A>
-class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
+template<typename A, template<typename, typename, typename> class T>
+class theta_union_alloc<A, T>::builder: public theta_base_builder<builder, A> {
 public:
+  using Table = theta_union_alloc::theta_table;
+
   builder(const A& allocator = A());
 
   /**
    * This is to create an instance of the union with predefined parameters.
    * @return an instance of the union
    */
-  theta_union_alloc<A> build() const;
+  theta_union_alloc<A, T> build() const;
 };
 
 // alias with default allocator for convenience
diff --git a/theta/include/theta_union_base.hpp b/theta/include/theta_union_base.hpp
index 6da10b2..1a2d47b 100644
--- a/theta/include/theta_union_base.hpp
+++ b/theta/include/theta_union_base.hpp
@@ -30,15 +30,16 @@ template<
   typename Policy,
   typename Sketch,
   typename CompactSketch,
-  typename Allocator
+  typename Allocator,
+  template<typename, typename, typename> class Table = theta_update_sketch_base
 >
 class theta_union_base {
 public:
-  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using hash_table = Table<Entry, ExtractKey, Allocator>;
   using resize_factor = typename hash_table::resize_factor;
   using comparator = compare_by_key<ExtractKey>;
 
-  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
+  theta_union_base(const Policy& policy, hash_table&& table);
 
   template<typename FwdSketch>
   void update(FwdSketch&& sketch);
diff --git a/theta/include/theta_union_base_impl.hpp b/theta/include/theta_union_base_impl.hpp
index 99a5bbf..c56a9a3 100644
--- a/theta/include/theta_union_base_impl.hpp
+++ b/theta/include/theta_union_base_impl.hpp
@@ -27,17 +27,16 @@
 
 namespace datasketches {
 
-template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
-    float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A, template<typename, typename, typename> class T>
+theta_union_base<EN, EK, P, S, CS, A, T>::theta_union_base(const P& policy, hash_table&& table):
 policy_(policy),
-table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
+table_(table),
 union_theta_(table_.theta_)
 {}
 
-template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A, template<typename, typename, typename> class T>
 template<typename SS>
-void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
+void theta_union_base<EN, EK, P, S, CS, A, T>::update(SS&& sketch) {
   if (sketch.is_empty()) return;
   if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
   table_.is_empty_ = false;
@@ -58,8 +57,8 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
   union_theta_ = std::min(union_theta_, table_.theta_);
 }
 
-template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A, template<typename, typename, typename> class T>
+CS theta_union_base<EN, EK, P, S, CS, A, T>::get_result(bool ordered) const {
   std::vector<EN, A> entries(table_.allocator_);
   if (table_.is_empty_) return CS(true, true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
   entries.reserve(table_.num_entries_);
@@ -80,13 +79,13 @@ CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
   return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
 }
 
-template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A, template<typename, typename, typename> class T>
+const P& theta_union_base<EN, EK, P, S, CS, A, T>::get_policy() const {
   return policy_;
 }
 
-template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-void theta_union_base<EN, EK, P, S, CS, A>::reset() {
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A, template<typename, typename, typename> class T>
+void theta_union_base<EN, EK, P, S, CS, A, T>::reset() {
   table_.reset();
   union_theta_ = table_.theta_;
 }
diff --git a/theta/include/theta_union_impl.hpp b/theta/include/theta_union_impl.hpp
index 8618618..f444a9f 100644
--- a/theta/include/theta_union_impl.hpp
+++ b/theta/include/theta_union_impl.hpp
@@ -20,35 +20,37 @@
 #ifndef THETA_UNION_IMPL_HPP_
 #define THETA_UNION_IMPL_HPP_
 
+#include <utility>
+
 namespace datasketches {
 
-template<typename A>
-theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator):
-state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator)
+template<typename A, template<typename, typename, typename> class T>
+theta_union_alloc<A, T>::theta_union_alloc(theta_table&& table):
+state_{nop_policy(), std::forward<T<Entry,ExtractKey,A>>(table)}
 {}
 
-template<typename A>
+template<typename A, template<typename, typename, typename> class T>
 template<typename SS>
-void theta_union_alloc<A>::update(SS&& sketch) {
+void theta_union_alloc<A, T>::update(SS&& sketch) {
   state_.update(std::forward<SS>(sketch));
 }
 
-template<typename A>
-auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
+template<typename A, template<typename, typename, typename> class T>
+auto theta_union_alloc<A, T>::get_result(bool ordered) const -> CompactSketch {
   return state_.get_result(ordered);
 }
 
-template<typename A>
-void theta_union_alloc<A>::reset() {
+template<typename A, template<typename, typename, typename> class T>
+void theta_union_alloc<A, T>::reset() {
   state_.reset();
 }
 
-template<typename A>
-theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
+template<typename A, template<typename, typename, typename> class T>
+theta_union_alloc<A, T>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
 
-template<typename A>
-auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
-  return theta_union_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
+template<typename A, template<typename, typename, typename> class T>
+auto theta_union_alloc<A, T>::builder::build() const -> theta_union_alloc {
+  return theta_union_alloc(Table(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_));
 }
 
 } /* namespace datasketches */
diff --git a/tuple/include/array_of_doubles_union_impl.hpp b/tuple/include/array_of_doubles_union_impl.hpp
index c3fabf0..6d4fc7e 100644
--- a/tuple/include/array_of_doubles_union_impl.hpp
+++ b/tuple/include/array_of_doubles_union_impl.hpp
@@ -21,7 +21,7 @@ namespace datasketches {
 
 template<typename A>
 array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
-Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
+Base(policy, typename Base::theta_table(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator))
 {}
 
 template<typename A>
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 70571df..c44e564 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -29,7 +29,7 @@ namespace datasketches {
 
 // forward-declarations
 template<typename S, typename A> class tuple_sketch;
-template<typename S, typename U, typename P, typename A> class update_tuple_sketch;
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T> class update_tuple_sketch;
 template<typename S, typename A> class compact_tuple_sketch;
 template<typename A> class theta_sketch_alloc;
 
@@ -203,7 +203,8 @@ template<
   typename Summary,
   typename Update = Summary,
   typename Policy = default_update_policy<Summary, Update>,
-  typename Allocator = std::allocator<Summary>
+  typename Allocator = std::allocator<Summary>,
+  template<typename, typename, typename> class Table = theta_update_sketch_base
 >
 class update_tuple_sketch: public tuple_sketch<Summary, Allocator> {
 public:
@@ -213,7 +214,7 @@ public:
   using iterator = typename Base::iterator;
   using const_iterator = typename Base::const_iterator;
   using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
-  using tuple_map = theta_update_sketch_base<Entry, ExtractKey, AllocEntry>;
+  using tuple_map = Table<Entry, ExtractKey, AllocEntry>;
   using resize_factor = typename tuple_map::resize_factor;
 
   // No constructor here. Use builder instead.
@@ -524,7 +525,10 @@ protected:
 
 // builder
 
-template<typename Derived, typename Policy, typename Allocator>
+template<
+    typename Derived,
+    typename Policy,
+    typename Allocator>
 class tuple_base_builder: public theta_base_builder<Derived, Allocator> {
 public:
   tuple_base_builder(const Policy& policy, const Allocator& allocator);
@@ -533,8 +537,8 @@ protected:
   Policy policy_;
 };
 
-template<typename S, typename U, typename P, typename A>
-class update_tuple_sketch<S, U, P, A>::builder: public tuple_base_builder<builder, P, A> {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+class update_tuple_sketch<S, U, P, A, T>::builder: public tuple_base_builder<builder, P, A> {
 public:
   /**
    * Creates and instance of the builder with default parameters.
@@ -545,7 +549,7 @@ public:
    * This is to create an instance of the sketch with predefined parameters.
    * @return an instance of the sketch
    */
-  update_tuple_sketch<S, U, P, A> build() const;
+  update_tuple_sketch<S, U, P, A, T> build() const;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 0766e4d..149a481 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -95,122 +95,122 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
 
 // update sketch
 
-template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+update_tuple_sketch<S, U, P, A, T>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
 policy_(policy),
 map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
 {}
 
-template<typename S, typename U, typename P, typename A>
-A update_tuple_sketch<S, U, P, A>::get_allocator() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+A update_tuple_sketch<S, U, P, A, T>::get_allocator() const {
   return map_.allocator_;
 }
 
-template<typename S, typename U, typename P, typename A>
-bool update_tuple_sketch<S, U, P, A>::is_empty() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+bool update_tuple_sketch<S, U, P, A, T>::is_empty() const {
   return map_.is_empty_;
 }
 
-template<typename S, typename U, typename P, typename A>
-bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+bool update_tuple_sketch<S, U, P, A, T>::is_ordered() const {
   return map_.num_entries_ > 1 ? false : true;;
 }
 
-template<typename S, typename U, typename P, typename A>
-uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+uint64_t update_tuple_sketch<S, U, P, A, T>::get_theta64() const {
   return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
 }
 
-template<typename S, typename U, typename P, typename A>
-uint32_t update_tuple_sketch<S, U, P, A>::get_num_retained() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+uint32_t update_tuple_sketch<S, U, P, A, T>::get_num_retained() const {
   return map_.num_entries_;
 }
 
-template<typename S, typename U, typename P, typename A>
-uint16_t update_tuple_sketch<S, U, P, A>::get_seed_hash() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+uint16_t update_tuple_sketch<S, U, P, A, T>::get_seed_hash() const {
   return compute_seed_hash(map_.seed_);
 }
 
-template<typename S, typename U, typename P, typename A>
-uint8_t update_tuple_sketch<S, U, P, A>::get_lg_k() const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+uint8_t update_tuple_sketch<S, U, P, A, T>::get_lg_k() const {
   return map_.lg_nom_size_;
 }
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::get_rf() const -> resize_factor {
   return map_.rf_;
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(uint64_t key, UU&& value) {
   update(&key, sizeof(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(int64_t key, UU&& value) {
   update(&key, sizeof(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(uint32_t key, UU&& value) {
   update(static_cast<int32_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(int32_t key, UU&& value) {
   update(static_cast<int64_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(uint16_t key, UU&& value) {
   update(static_cast<int16_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(int16_t key, UU&& value) {
   update(static_cast<int64_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(uint8_t key, UU&& value) {
   update(static_cast<int8_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(int8_t key, UU&& value) {
   update(static_cast<int64_t>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(const std::string& key, UU&& value) {
   if (key.empty()) return;
   update(key.c_str(), key.length(), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(double key, UU&& value) {
   update(canonical_double(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(float key, UU&& value) {
   update(static_cast<double>(key), std::forward<UU>(value));
 }
 
-template<typename S, typename U, typename P, typename A>
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
+void update_tuple_sketch<S, U, P, A, T>::update(const void* key, size_t length, UU&& value) {
   const uint64_t hash = map_.hash_and_screen(key, length);
   if (hash == 0) return;
   auto result = map_.find(hash);
@@ -223,43 +223,43 @@ void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&
   }
 }
 
-template<typename S, typename U, typename P, typename A>
-void update_tuple_sketch<S, U, P, A>::trim() {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+void update_tuple_sketch<S, U, P, A, T>::trim() {
   map_.trim();
 }
 
-template<typename S, typename U, typename P, typename A>
-void update_tuple_sketch<S, U, P, A>::reset() {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+void update_tuple_sketch<S, U, P, A, T>::reset() {
   map_.reset();
 }
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::begin() -> iterator {
   return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
 }
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::end() -> iterator {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::end() -> iterator {
   return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
 }
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::begin() const -> const_iterator {
   return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
 }
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
 }
 
-template<typename S, typename U, typename P, typename A>
-compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered) const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A, T>::compact(bool ordered) const {
   return compact_tuple_sketch<S, A>(*this, ordered);
 }
 
-template<typename S, typename U, typename P, typename A>
-void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+void update_tuple_sketch<S, U, P, A, T>::print_specifics(std::ostringstream& os) const {
   os << "   lg nominal size      : " << (int) map_.lg_nom_size_ << std::endl;
   os << "   lg current size      : " << (int) map_.lg_cur_size_ << std::endl;
   os << "   resize factor        : " << (1 << map_.rf_) << std::endl;
@@ -589,12 +589,12 @@ template<typename D, typename P, typename A>
 tuple_base_builder<D, P, A>::tuple_base_builder(const P& policy, const A& allocator):
 theta_base_builder<D, A>(allocator), policy_(policy) {}
 
-template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+update_tuple_sketch<S, U, P, A, T>::builder::builder(const P& policy, const A& allocator):
 tuple_base_builder<builder, P, A>(policy, allocator) {}
 
-template<typename S, typename U, typename P, typename A>
-auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
+template<typename S, typename U, typename P, typename A, template<typename, typename, typename> class T>
+auto update_tuple_sketch<S, U, P, A, T>::builder::build() const -> update_tuple_sketch {
   return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
 }
 
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
index 1c518da..4a71748 100644
--- a/tuple/include/tuple_union.hpp
+++ b/tuple/include/tuple_union.hpp
@@ -36,7 +36,8 @@ struct default_union_policy {
 template<
   typename Summary,
   typename Policy = default_union_policy<Summary>,
-  typename Allocator = std::allocator<Summary>
+  typename Allocator = std::allocator<Summary>,
+  template<typename, typename, typename> class Table = theta_update_sketch_base
 >
 class tuple_union {
 public:
@@ -46,6 +47,7 @@ public:
   using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
   using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
   using resize_factor = theta_constants::resize_factor;
+  using theta_table = Table<Entry, ExtractKey, AllocEntry>;
 
   // reformulate the external policy that operates on Summary
   // in terms of operations on Entry
@@ -61,7 +63,7 @@ public:
     Policy policy_;
   };
 
-  using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
+  using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry, Table>;
 
   // No constructor here. Use builder instead.
   class builder;
@@ -89,12 +91,14 @@ protected:
   State state_;
 
   // for builder
-  tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
+  tuple_union(const Policy& policy, theta_table&& table);
 };
 
-template<typename S, typename P, typename A>
-class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+class tuple_union<S, P, A, T>::builder: public tuple_base_builder<builder, P, A> {
 public:
+  using Table = tuple_union::theta_table;
+
   /**
    * Creates and instance of the builder with default parameters.
    */
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
index 98ea8a5..4ad79e9 100644
--- a/tuple/include/tuple_union_impl.hpp
+++ b/tuple/include/tuple_union_impl.hpp
@@ -19,34 +19,34 @@
 
 namespace datasketches {
 
-template<typename S, typename P, typename A>
-tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
-state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator)
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+tuple_union<S, P, A, T>::tuple_union(const P& policy, theta_table&& table):
+state_{internal_policy(policy), std::forward<T<Entry,ExtractKey,AllocEntry>>(table)}
 {}
 
-template<typename S, typename P, typename A>
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
 template<typename SS>
-void tuple_union<S, P, A>::update(SS&& sketch) {
+void tuple_union<S, P, A, T>::update(SS&& sketch) {
   state_.update(std::forward<SS>(sketch));
 }
 
-template<typename S, typename P, typename A>
-auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+auto tuple_union<S, P, A, T>::get_result(bool ordered) const -> CompactSketch {
   return state_.get_result(ordered);
 }
 
-template<typename S, typename P, typename A>
-void tuple_union<S, P, A>::reset() {
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+void tuple_union<S, P, A, T>::reset() {
   return state_.reset();
 }
 
-template<typename S, typename P, typename A>
-tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+tuple_union<S, P, A, T>::builder::builder(const P& policy, const A& allocator):
 tuple_base_builder<builder, P, A>(policy, allocator) {}
 
-template<typename S, typename P, typename A>
-auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
-  return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
+template<typename S, typename P, typename A, template<typename, typename, typename> class T>
+auto tuple_union<S, P, A, T>::builder::build() const -> tuple_union {
+  return tuple_union(this->policy_, Table(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_));
 }
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org