You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/04/26 15:46:48 UTC
[impala] 04/04: IMPALA-10631: Upgrade DataSketches to version 3.0.0

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 75fa056dc04c6cf2c153de29595d5568db6a074f
Author: Fucun Chu <ch...@hotmail.com>
AuthorDate: Fri Apr 2 00:35:26 2021 +0800

    IMPALA-10631: Upgrade DataSketches to version 3.0.0
    
    Upgrade the external DataSketches files CPC/HLL/KLL/Theta to version
    3.0.0
    
    tests:
     -Ran the tests from tests/query_test/test_datasketches.py
    
    Change-Id: I37622a7643d015b80f55b802421eae826aa7a4f9
    Reviewed-on: http://gerrit.cloudera.org:8080/17294
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/datasketches-test.cc                  |  12 +-
 .../datasketches/AuxHashMap-internal.hpp           |  89 +--
 be/src/thirdparty/datasketches/AuxHashMap.hpp      |  17 +-
 .../datasketches/CompositeInterpolationXTable.hpp  |   4 +-
 .../datasketches/CouponHashSet-internal.hpp        |  82 +-
 be/src/thirdparty/datasketches/CouponHashSet.hpp   |  20 +-
 .../datasketches/CouponList-internal.hpp           | 122 ++-
 be/src/thirdparty/datasketches/CouponList.hpp      |  23 +-
 .../thirdparty/datasketches/CubicInterpolation.hpp |   4 +-
 be/src/thirdparty/datasketches/HarmonicNumbers.hpp |   4 +-
 .../thirdparty/datasketches/Hll4Array-internal.hpp |  29 +-
 be/src/thirdparty/datasketches/Hll4Array.hpp       |   2 +-
 .../thirdparty/datasketches/Hll6Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll6Array.hpp       |   5 +-
 .../thirdparty/datasketches/Hll8Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll8Array.hpp       |   5 +-
 .../thirdparty/datasketches/HllArray-internal.hpp  |  83 +-
 be/src/thirdparty/datasketches/HllArray.hpp        |  16 +-
 .../thirdparty/datasketches/HllSketch-internal.hpp |  20 +-
 be/src/thirdparty/datasketches/HllSketchImpl.hpp   |   3 +-
 .../datasketches/HllSketchImplFactory.hpp          |  65 +-
 .../thirdparty/datasketches/HllUnion-internal.hpp  |  31 +-
 be/src/thirdparty/datasketches/HllUtil.hpp         |   2 +-
 be/src/thirdparty/datasketches/MurmurHash3.h       |   7 +
 be/src/thirdparty/datasketches/README.md           |   6 +-
 .../datasketches/RelativeErrorTables.hpp           |   2 +-
 .../bounds_on_ratios_in_sampled_sets.hpp           | 136 ++++
 .../bounds_on_ratios_in_theta_sketched_sets.hpp    | 135 ++++
 be/src/thirdparty/datasketches/cpc_common.hpp      |   3 +
 be/src/thirdparty/datasketches/cpc_compressor.hpp  |   4 +-
 .../datasketches/cpc_compressor_impl.hpp           |  47 +-
 be/src/thirdparty/datasketches/cpc_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/cpc_sketch_impl.hpp |  33 +-
 be/src/thirdparty/datasketches/cpc_union.hpp       |   4 +-
 be/src/thirdparty/datasketches/cpc_union_impl.hpp  |  12 +-
 be/src/thirdparty/datasketches/cpc_util.hpp        |   6 -
 be/src/thirdparty/datasketches/hll.hpp             |  40 +-
 be/src/thirdparty/datasketches/icon_estimator.hpp  |   6 +-
 .../datasketches/kll_quantile_calculator.hpp       |   2 +-
 .../datasketches/kll_quantile_calculator_impl.hpp  |   6 +-
 be/src/thirdparty/datasketches/kll_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/kll_sketch_impl.hpp | 168 ++--
 .../thirdparty/datasketches/memory_operations.hpp  |  12 +
 be/src/thirdparty/datasketches/theta_a_not_b.hpp   |  41 +-
 .../thirdparty/datasketches/theta_a_not_b_impl.hpp |  51 +-
 ...ubicInterpolation.hpp => theta_comparators.hpp} |  39 +-
 ...InterpolationXTable.hpp => theta_constants.hpp} |  24 +-
 be/src/thirdparty/datasketches/theta_helpers.hpp   |  54 ++
 .../thirdparty/datasketches/theta_intersection.hpp |  51 +-
 .../datasketches/theta_intersection_base.hpp       |  59 ++
 .../datasketches/theta_intersection_base_impl.hpp  | 121 +++
 .../datasketches/theta_intersection_impl.hpp       |  98 +--
 ...tionXTable.hpp => theta_jaccard_similarity.hpp} |  25 +-
 .../datasketches/theta_jaccard_similarity_base.hpp | 156 ++++
 .../datasketches/theta_set_difference_base.hpp     |  54 ++
 .../theta_set_difference_base_impl.hpp             |  85 +++
 be/src/thirdparty/datasketches/theta_sketch.hpp    | 398 ++++------
 .../thirdparty/datasketches/theta_sketch_impl.hpp  | 850 +++++----------------
 be/src/thirdparty/datasketches/theta_union.hpp     |  87 +--
 .../thirdparty/datasketches/theta_union_base.hpp   |  60 ++
 .../datasketches/theta_union_base_impl.hpp         |  89 +++
 .../thirdparty/datasketches/theta_union_impl.hpp   |  82 +-
 .../datasketches/theta_update_sketch_base.hpp      | 243 ++++++
 .../datasketches/theta_update_sketch_base_impl.hpp | 394 ++++++++++
 be/src/thirdparty/datasketches/u32_table.hpp       |   6 +-
 be/src/thirdparty/datasketches/u32_table_impl.hpp  |  18 +-
 66 files changed, 2545 insertions(+), 1895 deletions(-)

diff --git a/be/src/exprs/datasketches-test.cc b/be/src/exprs/datasketches-test.cc
index 687f070..368050d 100644
--- a/be/src/exprs/datasketches-test.cc
+++ b/be/src/exprs/datasketches-test.cc
@@ -174,22 +174,22 @@ TEST(TestDataSketchesTheta, UseDataSketchesInterface) {
     datasketches::update_theta_sketch sketch1 =
         datasketches::update_theta_sketch::builder().build();
     for (int key = 0; key < 100000; key++) sketch1.update(key);
-    sketch1.serialize(sketch_stream1);
+    sketch1.compact().serialize(sketch_stream1);
 
     // 100000 distinct keys
     datasketches::update_theta_sketch sketch2 =
         datasketches::update_theta_sketch::builder().build();
     for (int key = 50000; key < 150000; key++) sketch2.update(key);
-    sketch2.serialize(sketch_stream2);
+    sketch2.compact().serialize(sketch_stream2);
   }
 
   // this section deserializes the sketches, produces union and intersection
   {
-    datasketches::update_theta_sketch sketch1 =
-        datasketches::update_theta_sketch::deserialize(sketch_stream1);
+    datasketches::compact_theta_sketch sketch1 =
+        datasketches::compact_theta_sketch::deserialize(sketch_stream1);
 
-    datasketches::update_theta_sketch sketch2 =
-        datasketches::update_theta_sketch::deserialize(sketch_stream2);
+    datasketches::compact_theta_sketch sketch2 =
+        datasketches::compact_theta_sketch::deserialize(sketch_stream2);
 
     // union opertion
     datasketches::theta_union u = datasketches::theta_union::builder().build();
diff --git a/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp b/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
index 9a8e135..60142ec 100644
--- a/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
+++ b/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
@@ -26,42 +26,28 @@
 namespace datasketches {
 
 template<typename A>
-AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
-  : lgConfigK(lgConfigK),
-    lgAuxArrInts(lgAuxArrInts),
-    auxCount(0) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  const int numItems = 1 << lgAuxArrInts;
-  auxIntArr = intAlloc().allocate(numItems);
-  std::fill(auxIntArr, auxIntArr + numItems, 0);
-}
-
-template<typename A>
-AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
-}
+AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
+lgConfigK(lgConfigK),
+lgAuxArrInts(lgAuxArrInts),
+auxCount(0),
+entries(1 << lgAuxArrInts, 0, allocator)
+{}
 
 template<typename A>
-AuxHashMap<A>::AuxHashMap(const AuxHashMap& that)
-  : lgConfigK(that.lgConfigK),
-    lgAuxArrInts(that.lgAuxArrInts),
-    auxCount(that.auxCount) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  const int numItems = 1 << lgAuxArrInts;
-  auxIntArr = intAlloc().allocate(numItems);
-  std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
+AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
+  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
+  return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
                                           int lgConfigK,
                                           int auxCount, int lgAuxArrInts,
-                                          bool srcCompact) {
+                                          bool srcCompact, const A& allocator) {
   int lgArrInts = lgAuxArrInts;
   if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
     lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
@@ -77,7 +63,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
     if (len < auxCount * sizeof(int)) {
       throw std::out_of_range("Input array too small to hold AuxHashMap image");
     }
-    auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+    auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
     for (int i = 0; i < auxCount; ++i) {
       int pair = auxPtr[i];
       int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
@@ -89,7 +75,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
     if (len < itemsToRead * sizeof(int)) {
       throw std::out_of_range("Input array too small to hold AuxHashMap image");
     }
-    auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+    auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
     for (int i = 0; i < itemsToRead; ++i) {
       int pair = auxPtr[i];
       if (pair == HllUtil<A>::EMPTY) { continue; }
@@ -110,7 +96,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
                                           const int auxCount, const int lgAuxArrInts,
-                                          const bool srcCompact) {
+                                          const bool srcCompact, const A& allocator) {
   int lgArrInts = lgAuxArrInts;
   if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
     lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
@@ -118,7 +104,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
     lgArrInts = lgAuxArrInts;
   }
 
-  AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+  AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
   typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
   aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
 
@@ -153,23 +139,17 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
 }
 
 template<typename A>
-AuxHashMap<A>::~AuxHashMap<A>() {
-  // should be no way to have an object without a valid array
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
-}
-
-template<typename A>
 std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
   return [](AuxHashMap<A>* ptr) {
+    ahmAlloc alloc(ptr->entries.get_allocator());
     ptr->~AuxHashMap();
-    ahmAlloc().deallocate(ptr, 1);
+    alloc.deallocate(ptr, 1);
   };
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::copy() const {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
+  return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
 }
 
 template<typename A>
@@ -179,7 +159,7 @@ int AuxHashMap<A>::getAuxCount() const {
 
 template<typename A>
 int* AuxHashMap<A>::getAuxIntArr(){
-  return auxIntArr;
+  return entries.data();
 }
 
 template<typename A>
@@ -199,7 +179,7 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const {
 
 template<typename A>
 void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
-  const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   const int entry_pair = HllUtil<A>::pair(slotNo, value);
   if (index >= 0) {
     throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
@@ -207,16 +187,16 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
   }
 
   // found empty entry
-  auxIntArr[~index] = entry_pair;
+  entries[~index] = entry_pair;
   ++auxCount;
   checkGrow();
 }
 
 template<typename A>
 int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
-  const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   if (index >= 0) {
-    return HllUtil<A>::getValue(auxIntArr[index]);
+    return HllUtil<A>::getValue(entries[index]);
   }
 
   throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
@@ -224,9 +204,9 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
 
 template<typename A>
 void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
-  const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   if (idx >= 0) {
-    auxIntArr[idx] = HllUtil<A>::pair(slotNo, value);
+    entries[idx] = HllUtil<A>::pair(slotNo, value);
     return;
   }
 
@@ -243,23 +223,18 @@ void AuxHashMap<A>::checkGrow() {
 
 template<typename A>
 void AuxHashMap<A>::growAuxSpace() {
-  int* oldArray = auxIntArr;
-  const int oldArrLen = 1 << lgAuxArrInts;
   const int configKmask = (1 << lgConfigK) - 1;
   const int newArrLen = 1 << ++lgAuxArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  auxIntArr = intAlloc().allocate(newArrLen);
-  std::fill(auxIntArr, auxIntArr + newArrLen, 0);
-  for (int i = 0; i < oldArrLen; ++i) {
-    const int fetched = oldArray[i];
+  vector_int entries_new(newArrLen, 0, entries.get_allocator());
+  for (size_t i = 0; i < entries.size(); ++i) {
+    const int fetched = entries[i];
     if (fetched != HllUtil<A>::EMPTY) {
       // find empty in new array
-      const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask);
-      auxIntArr[~idx] = fetched;
+      const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
+      entries_new[~idx] = fetched;
     }
   }
-
-  intAlloc().deallocate(oldArray, oldArrLen);
+  entries = std::move(entries_new);
 }
 
 //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
@@ -290,12 +265,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
 
 template<typename A>
 coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
-  return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
+  return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
 }
 
 template<typename A>
 coupon_iterator<A> AuxHashMap<A>::end() const {
-  return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
+  return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
 }
 
 }
diff --git a/be/src/thirdparty/datasketches/AuxHashMap.hpp b/be/src/thirdparty/datasketches/AuxHashMap.hpp
index b37e85c..e18f15d 100644
--- a/be/src/thirdparty/datasketches/AuxHashMap.hpp
+++ b/be/src/thirdparty/datasketches/AuxHashMap.hpp
@@ -28,22 +28,21 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class AuxHashMap final {
   public:
-    explicit AuxHashMap(int lgAuxArrInts, int lgConfigK);
-    explicit AuxHashMap(const AuxHashMap<A>& that);
-    static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK);
+    AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
+    static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
     static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
 
     static AuxHashMap* deserialize(const void* bytes, size_t len,
                                    int lgConfigK,
                                    int auxCount, int lgAuxArrInts,
-                                   bool srcCompact);
+                                   bool srcCompact, const A& allocator);
     static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
                                    int auxCount, int lgAuxArrInts,
-                                   bool srcCompact);
-    virtual ~AuxHashMap();
+                                   bool srcCompact, const A& allocator);
+    virtual ~AuxHashMap() = default;
     static std::function<void(AuxHashMap<A>*)> make_deleter();
     
     AuxHashMap* copy() const;
@@ -64,6 +63,8 @@ class AuxHashMap final {
   private:
     typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
 
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
+
     // static so it can be used when resizing
     static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
 
@@ -73,7 +74,7 @@ class AuxHashMap final {
     const int lgConfigK;
     int lgAuxArrInts;
     int auxCount;
-    int* auxIntArr;
+    vector_int entries;
 };
 
 }
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
index 8baecbe..0fa0af8 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class CompositeInterpolationXTable {
   public:
     static int get_y_stride(int logK);
@@ -37,4 +37,4 @@ class CompositeInterpolationXTable {
 
 #include "CompositeInterpolationXTable-internal.hpp"
 
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
diff --git a/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp b/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
index 35facfe..29a3ea7 100644
--- a/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
+++ b/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
@@ -31,8 +31,8 @@ template<typename A>
 static int find(const int* array, const int lgArrInts, const int coupon);
 
 template<typename A>
-CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
-  : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
+CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator)
+  : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
 {
   if (lgConfigK <= 7) {
     throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
@@ -41,27 +41,21 @@ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHl
 }
 
 template<typename A>
-CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
-  : CouponList<A>(that) {}
-
-template<typename A>
 CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
   : CouponList<A>(that, tgtHllType) {}
 
 template<typename A>
-CouponHashSet<A>::~CouponHashSet() {}
-
-template<typename A>
 std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
     CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
+    ChsAlloc chsa(chs->getAllocator());
     chs->~CouponHashSet();
-    chsAlloc().deallocate(chs, 1);
+    chsa.deallocate(chs, 1);
   };
 }
 
 template<typename A>
-CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
+CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded 
     throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
   }
@@ -79,7 +73,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
 
   const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
   if (mode != SET) {
-    throw std::invalid_argument("Calling set construtor with non-set mode data");
+    throw std::invalid_argument("Calling set constructor with non-set mode data");
   }
 
   const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -106,7 +100,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
                                 + ", found: " + std::to_string(len));
   }
 
-  CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
+  ChsAlloc chsa(allocator);
+  CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
 
   if (compactFlag) {
     const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
@@ -116,24 +111,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
       sketch->couponUpdate(coupon);
     }
   } else {
-    int* oldArr = sketch->couponIntArr;
-    const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
-    sketch->lgCouponArrInts = lgArrInts;
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
+    sketch->coupons.resize(1 << lgArrInts);
     sketch->couponCount = couponCount;
     // only need to read valid coupons, unlike in stream case
-    std::memcpy(sketch->couponIntArr,
+    std::memcpy(sketch->coupons.data(),
                 data + HllUtil<A>::HASH_SET_INT_ARR_START,
                 couponCount * sizeof(int));
-    intAlloc().deallocate(oldArr, oldArrLen);
   }
 
   return sketch;
 }
 
 template<typename A>
-CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
+CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -149,7 +139,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
   if (mode != SET) {
-    throw std::invalid_argument("Calling set construtor with non-set mode data");
+    throw std::invalid_argument("Calling set constructor with non-set mode data");
   }
 
   target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -168,7 +158,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
     lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
   }
 
-  CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
+  ChsAlloc chsa(allocator);
+  CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
   typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
   coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
 
@@ -181,13 +172,10 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
       sketch->couponUpdate(coupon);
     }
   } else {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
-    sketch->lgCouponArrInts = lgArrInts;
-    sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
+    sketch->coupons.resize(1 << lgArrInts);
     sketch->couponCount = couponCount;
     // for stream processing, read entire list so read pointer ends up set correctly
-    is.read((char*)sketch->couponIntArr, (1 << sketch->lgCouponArrInts) * sizeof(int));
+    is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int));
   } 
 
   if (!is.good())
@@ -198,21 +186,24 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
 
 template<typename A>
 CouponHashSet<A>* CouponHashSet<A>::copy() const {
-  return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this);
+  ChsAlloc chsa(this->coupons.get_allocator());
+  return new (chsa.allocate(1)) CouponHashSet<A>(*this);
 }
 
 template<typename A>
 CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
-  return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this, tgtHllType);
+  ChsAlloc chsa(this->coupons.get_allocator());
+  return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType);
 }
 
 template<typename A>
 HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
-  const int index = find<A>(this->couponIntArr, this->lgCouponArrInts, coupon);
+  const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
+  const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon);
   if (index >= 0) {
     return this; // found duplicate, ignore
   }
-  this->couponIntArr[~index] = coupon; // found empty
+  this->coupons[~index] = coupon; // found empty
   ++this->couponCount;
   if (checkGrowOrPromote()) {
     return this->promoteHeapListOrSetToHll(*this);
@@ -232,39 +223,34 @@ int CouponHashSet<A>::getPreInts() const {
 
 template<typename A>
 bool CouponHashSet<A>::checkGrowOrPromote() {
-  if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * (1 << this->lgCouponArrInts))) {
-    if (this->lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
+  if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) {
+    const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
+    if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
       return true; // promote to HLL
     }
-    int tgtLgCoupArrSize = this->lgCouponArrInts + 1;
-    growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
+    growHashSet(lgCouponArrInts + 1);
   }
   return false;
 }
 
 template<typename A>
-void CouponHashSet<A>::growHashSet(const int srcLgCoupArrSize, const int tgtLgCoupArrSize) {
+void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
   const int tgtLen = 1 << tgtLgCoupArrSize;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
-  std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
+  vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator());
 
-  const int srcLen = 1 << srcLgCoupArrSize;
+  const int srcLen = this->coupons.size();
   for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
-    const int fetched = this->couponIntArr[i];
+    const int fetched = this->coupons[i];
     if (fetched != HllUtil<A>::EMPTY) {
-      const int idx = find<A>(tgtCouponIntArr, tgtLgCoupArrSize, fetched); // search TGT array
+      const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
       if (idx < 0) { // found EMPTY
-        tgtCouponIntArr[~idx] = fetched; // insert
+        coupons_new[~idx] = fetched; // insert
         continue;
       }
       throw std::runtime_error("Error: Found duplicate coupon");
     }
   }
-
-  intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
-  this->couponIntArr = tgtCouponIntArr;
-  this->lgCouponArrInts = tgtLgCoupArrSize;
+  this->coupons = std::move(coupons_new);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/CouponHashSet.hpp b/be/src/thirdparty/datasketches/CouponHashSet.hpp
index 7aaffc3..b9b99b7 100644
--- a/be/src/thirdparty/datasketches/CouponHashSet.hpp
+++ b/be/src/thirdparty/datasketches/CouponHashSet.hpp
@@ -24,20 +24,20 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class CouponHashSet : public CouponList<A> {
   public:
-    static CouponHashSet* newSet(const void* bytes, size_t len);
-    static CouponHashSet* newSet(std::istream& is);
-    explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
-    explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
-    explicit CouponHashSet(const CouponHashSet& that);
+    static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
+    static CouponHashSet* newSet(std::istream& is, const A& allocator);
+    CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
+    CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
 
-    virtual ~CouponHashSet();
+    virtual ~CouponHashSet() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
   protected:
-    
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
+
     virtual CouponHashSet* copy() const;
     virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
 
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
     friend class HllSketchImplFactory<A>;
 
   private:
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
+    using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
     bool checkGrowOrPromote();
-    void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
+    void growHashSet(int tgtLgCoupArrSize);
 };
 
 }
diff --git a/be/src/thirdparty/datasketches/CouponList-internal.hpp b/be/src/thirdparty/datasketches/CouponList-internal.hpp
index 1800a37..fd304c8 100644
--- a/be/src/thirdparty/datasketches/CouponList-internal.hpp
+++ b/be/src/thirdparty/datasketches/CouponList-internal.hpp
@@ -23,6 +23,7 @@
 #include "CouponList.hpp"
 #include "CubicInterpolation.hpp"
 #include "HllUtil.hpp"
+#include "count_zeros.hpp"
 
 #include <algorithm>
 #include <cmath>
@@ -30,74 +31,45 @@
 namespace datasketches {
 
 template<typename A>
-CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
-  : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
-    if (mode == hll_mode::LIST) {
-      lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
-    } else { // mode == SET
-      lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
-    }
-    oooFlag = false;
-    const int arrayLen = 1 << lgCouponArrInts;
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    couponIntArr = intAlloc().allocate(arrayLen);
-    std::fill(couponIntArr, couponIntArr + arrayLen, 0);
-    couponCount = 0;
-}
-
-template<typename A>
-CouponList<A>::CouponList(const CouponList& that)
-  : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
-    lgCouponArrInts(that.lgCouponArrInts),
-    couponCount(that.couponCount),
-    oooFlag(that.oooFlag) {
-
-  const int numItems = 1 << lgCouponArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  couponIntArr = intAlloc().allocate(numItems);
-  std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
-}
-
-template<typename A>
-CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
-  : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
-    lgCouponArrInts(that.lgCouponArrInts),
-    couponCount(that.couponCount),
-    oooFlag(that.oooFlag) {
-
-  const int numItems = 1 << lgCouponArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  couponIntArr = intAlloc().allocate(numItems);
-  std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
-}
+CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
+HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
+couponCount(0),
+oooFlag(false),
+coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
+{}
 
 template<typename A>
-CouponList<A>::~CouponList() {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
-}
+CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
+HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
+couponCount(that.couponCount),
+oooFlag(that.oooFlag),
+coupons(that.coupons)
+{}
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
     CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
+    ClAlloc cla(cl->getAllocator());
     cl->~CouponList();
-    clAlloc().deallocate(cl, 1);
+    cla.deallocate(cl, 1);
   };
 }
 
 template<typename A>
 CouponList<A>* CouponList<A>::copy() const {
-  return new (clAlloc().allocate(1)) CouponList<A>(*this);
+  ClAlloc cla(coupons.get_allocator());
+  return new (cla.allocate(1)) CouponList<A>(*this);
 }
 
 template<typename A>
 CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
-  return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
+  ClAlloc cla(coupons.get_allocator());
+  return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
 }
 
 template<typename A>
-CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
+CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::LIST_INT_ARR_START) {
     throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
   }
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
   if (mode != LIST) {
-    throw std::invalid_argument("Calling set construtor with non-list mode data");
+    throw std::invalid_argument("Calling list constructor with non-list mode data");
   }
 
   target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
                                 + ", found: " + std::to_string(len));
   }
 
-  CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
+  ClAlloc cla(allocator);
+  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
   sketch->couponCount = couponCount;
   sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
 
   if (!emptyFlag) {
     // only need to read valid coupons, unlike in stream case
-    std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
+    std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
   }
   
   return sketch;
 }
 
 template<typename A>
-CouponList<A>* CouponList<A>::newList(std::istream& is) {
+CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
   if (mode != LIST) {
-    throw std::invalid_argument("Calling list construtor with non-list mode data");
+    throw std::invalid_argument("Calling list constructor with non-list mode data");
   }
 
   const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
   const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
   const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
 
-  CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
-  typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
+  ClAlloc cla(allocator);
+  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
+  using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
   coupon_list_ptr ptr(sketch, sketch->get_deleter());
   const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
   sketch->couponCount = couponCount;
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
     // For stream processing, need to read entire number written to stream so read
     // pointer ends up set correctly.
     // If not compact, still need to read empty items even though in order.
-    const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
-    is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
+    const int numToRead = (compact ? couponCount : sketch->coupons.size());
+    is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
   }
 
   if (!is.good())
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
 template<typename A>
 vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
   const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
-  vector_u8<A> byteArr(sketchSizeBytes);
+  vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
   uint8_t* bytes = byteArr.data() + header_size_bytes;
 
   bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
   bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
   bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
   bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
-  bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
+  bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
   bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
   bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
   bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
   const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
   switch (sw) {
     case 0: { // src updatable, dst updatable
-      std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
+      std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
       break;
     }
     case 1: { // src updatable, dst compact
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
   os.write((char*)&familyId, sizeof(familyId));
   const uint8_t lgKByte((uint8_t) this->lgConfigK);
   os.write((char*)&lgKByte, sizeof(lgKByte));
-  const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
+  const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
   os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
   const uint8_t flagsByte(this->makeFlagsByte(compact));
   os.write((char*)&flagsByte, sizeof(flagsByte));
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
   const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
   switch (sw) {
     case 0: { // src updatable, dst updatable
-      os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
+      os.write((char*)coupons.data(), coupons.size() * sizeof(int));
       break;
     }
     case 1: { // src updatable, dst compact
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
 
 template<typename A>
 HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
-  const int len = 1 << lgCouponArrInts;
-  for (int i = 0; i < len; ++i) { // search for empty slot
-    const int couponAtIdx = couponIntArr[i];
+  for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
+    const int couponAtIdx = coupons[i];
     if (couponAtIdx == HllUtil<A>::EMPTY) {
-      couponIntArr[i] = coupon; // the actual update
+      coupons[i] = coupon; // the actual update
       ++couponCount;
-      if (couponCount >= len) { // array full
+      if (couponCount == static_cast<int>(coupons.size())) { // array full
         if (this->lgConfigK < 8) {
           return promoteHeapListOrSetToHll(*this);
         }
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
 
 template<typename A>
 int CouponList<A>::getUpdatableSerializationBytes() const {
-  return getMemDataStart() + (4 << getLgCouponArrInts());
+  return getMemDataStart() + coupons.size() * sizeof(int);
 }
 
 template<typename A>
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
 }
 
 template<typename A>
-int CouponList<A>::getLgCouponArrInts() const {
-  return lgCouponArrInts;
-}
-
-template<typename A>
-int* CouponList<A>::getCouponIntArr() const {
-  return couponIntArr;
+A CouponList<A>::getAllocator() const {
+  return coupons.get_allocator();
 }
 
 template<typename A>
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
 
 template<typename A>
 coupon_iterator<A> CouponList<A>::begin(bool all) const {
-  return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
+  return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
 }
 
 template<typename A>
 coupon_iterator<A> CouponList<A>::end() const {
-  return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
+  return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
 }
 
 }
diff --git a/be/src/thirdparty/datasketches/CouponList.hpp b/be/src/thirdparty/datasketches/CouponList.hpp
index 063805b..c19569e 100644
--- a/be/src/thirdparty/datasketches/CouponList.hpp
+++ b/be/src/thirdparty/datasketches/CouponList.hpp
@@ -30,19 +30,18 @@ namespace datasketches {
 template<typename A>
 class HllSketchImplFactory;
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class CouponList : public HllSketchImpl<A> {
   public:
-    explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode);
-    explicit CouponList(const CouponList& that);
-    explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
+    CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
+    CouponList(const CouponList& that, target_hll_type tgtHllType);
 
-    static CouponList* newList(const void* bytes, size_t len);
-    static CouponList* newList(std::istream& is);
+    static CouponList* newList(const void* bytes, size_t len, const A& allocator);
+    static CouponList* newList(std::istream& is, const A& allocator);
     virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
     virtual void serialize(std::ostream& os, bool compact) const;
 
-    virtual ~CouponList();
+    virtual ~CouponList() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual CouponList* copy() const;
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
     coupon_iterator<A> end() const;
 
   protected:
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
+    using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
+
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
 
     HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
     HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
     virtual bool isOutOfOrderFlag() const;
     virtual void putOutOfOrderFlag(bool oooFlag);
 
-    virtual int getLgCouponArrInts() const;
-    virtual int* getCouponIntArr() const;
+    virtual A getAllocator() const;
 
-    int lgCouponArrInts;
     int couponCount;
     bool oooFlag;
-    int* couponIntArr;
+    vector_int coupons;
 
     friend class HllSketchImplFactory<A>;
 };
diff --git a/be/src/thirdparty/datasketches/CubicInterpolation.hpp b/be/src/thirdparty/datasketches/CubicInterpolation.hpp
index b9cdfe7..58fb7d7 100644
--- a/be/src/thirdparty/datasketches/CubicInterpolation.hpp
+++ b/be/src/thirdparty/datasketches/CubicInterpolation.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class CubicInterpolation {
   public:
     static double usingXAndYTables(const double xArr[], const double yArr[],
@@ -40,4 +40,4 @@ class CubicInterpolation {
 
 #include "CubicInterpolation-internal.hpp"
 
-#endif /* _CUBICINTERPOLATION_HPP_ */
\ No newline at end of file
+#endif /* _CUBICINTERPOLATION_HPP_ */
diff --git a/be/src/thirdparty/datasketches/HarmonicNumbers.hpp b/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
index 501ce0c..34b830a 100644
--- a/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
+++ b/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
@@ -25,7 +25,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class HarmonicNumbers {
   public:
     /**
@@ -45,4 +45,4 @@ class HarmonicNumbers {
 
 #include "HarmonicNumbers-internal.hpp"
 
-#endif /* _HARMONICNUMBERS_HPP_ */
\ No newline at end of file
+#endif /* _HARMONICNUMBERS_HPP_ */
diff --git a/be/src/thirdparty/datasketches/Hll4Array-internal.hpp b/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
index 8498bb8..f93014a 100644
--- a/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
@@ -30,13 +30,12 @@
 namespace datasketches {
 
 template<typename A>
-Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) {
+Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
+auxHashMap(nullptr)
+{
   const int numBytes = this->hll4ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-  auxHashMap = nullptr;
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
     Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
+    using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+    Hll4Alloc hll4Alloc(hll->getAllocator());
     hll->~Hll4Array();
-    hll4Alloc().deallocate(hll, 1);
+    hll4Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll4Array<A>* Hll4Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-  return new (hll4Alloc().allocate(1)) Hll4Array<A>(*this);
+  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+  Hll4Alloc hll4Alloc(this->getAllocator());
+  return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
 }
 
 template<typename A>
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
           // added to the exception table
           putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
           if (auxHashMap == nullptr) {
-            auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
+            auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
           }
           auxHashMap->mustAdd(slotNo, newVal);
         }
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
       } else { //newShiftedVal >= AUX_TOKEN
         // the former exception remains an exception, so must be added to the newAuxMap
         if (newAuxMap == nullptr) {
-          newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
+          newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
         }
         newAuxMap->mustAdd(slotNum, oldActualVal);
       }
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
 
 template<typename A>
 typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
-  return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
+  return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
 }
 
 template<typename A>
 typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
-  return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
+  return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll4Array.hpp b/be/src/thirdparty/datasketches/Hll4Array.hpp
index ff56c86..38b2c94 100644
--- a/be/src/thirdparty/datasketches/Hll4Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll4Array.hpp
@@ -31,7 +31,7 @@ class Hll4Iterator;
 template<typename A>
 class Hll4Array final : public HllArray<A> {
   public:
-    explicit Hll4Array(int lgConfigK, bool startFullSize);
+    explicit Hll4Array(int lgConfigK, bool startFullSize, const A& allocator);
     explicit Hll4Array(const Hll4Array<A>& that);
 
     virtual ~Hll4Array();
diff --git a/be/src/thirdparty/datasketches/Hll6Array-internal.hpp b/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
index a318564..e9f6e9f 100644
--- a/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
@@ -27,40 +27,29 @@
 namespace datasketches {
 
 template<typename A>
-Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
-  const int numBytes = this->hll6ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-}
-
-template<typename A>
-Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) :
-  HllArray<A>(that)
+Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator)
 {
-  // can determine hllByteArr size in parent class, no need to allocate here
-}
-
-template<typename A>
-Hll6Array<A>::~Hll6Array() {
-  // hllByteArr deleted in parent
+  const int numBytes = this->hll6ArrBytes(lgConfigK);
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
+    using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
     Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr);
+    Hll6Alloc hll6Alloc(hll->getAllocator());
     hll->~Hll6Array();
-    hll6Alloc().deallocate(hll, 1);
+    hll6Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll6Array<A>* Hll6Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-  return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this);
+  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+  Hll6Alloc hll6Alloc(this->getAllocator());
+  return new (hll6Alloc.allocate(1)) Hll6Array<A>(*this);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll6Array.hpp b/be/src/thirdparty/datasketches/Hll6Array.hpp
index 5178de8..03370b2 100644
--- a/be/src/thirdparty/datasketches/Hll6Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll6Array.hpp
@@ -30,10 +30,9 @@ class Hll6Iterator;
 template<typename A>
 class Hll6Array final : public HllArray<A> {
   public:
-    explicit Hll6Array(int lgConfigK, bool startFullSize);
-    explicit Hll6Array(const Hll6Array<A>& that);
+    Hll6Array(int lgConfigK, bool startFullSize, const A& allocator);
 
-    virtual ~Hll6Array();
+    virtual ~Hll6Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual Hll6Array* copy() const;
diff --git a/be/src/thirdparty/datasketches/Hll8Array-internal.hpp b/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
index cb14a0f..f27a796 100644
--- a/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
@@ -25,40 +25,29 @@
 namespace datasketches {
 
 template<typename A>
-Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
-  const int numBytes = this->hll8ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-}
-
-template<typename A>
-Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) :
-  HllArray<A>(that)
+Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator)
 {
-  // can determine hllByteArr size in parent class, no need to allocate here
-}
-
-template<typename A>
-Hll8Array<A>::~Hll8Array() {
-  // hllByteArr deleted in parent
+  const int numBytes = this->hll8ArrBytes(lgConfigK);
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
     Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
+    using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+    Hll8Alloc hll8Alloc(hll->getAllocator());
     hll->~Hll8Array();
-    hll8Alloc().deallocate(hll, 1);
+    hll8Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll8Array<A>* Hll8Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this);
+  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+  Hll8Alloc hll8Alloc(this->getAllocator());
+  return new (hll8Alloc.allocate(1)) Hll8Array<A>(*this);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll8Array.hpp b/be/src/thirdparty/datasketches/Hll8Array.hpp
index 2b0aefc..ea9a5bd 100644
--- a/be/src/thirdparty/datasketches/Hll8Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll8Array.hpp
@@ -30,10 +30,9 @@ class Hll8Iterator;
 template<typename A>
 class Hll8Array final : public HllArray<A> {
   public:
-    explicit Hll8Array(int lgConfigK, bool startFullSize);
-    explicit Hll8Array(const Hll8Array& that);
+    Hll8Array(int lgConfigK, bool startFullSize, const A& allocator);
 
-    virtual ~Hll8Array();
+    virtual ~Hll8Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual Hll8Array<A>* copy() const;
diff --git a/be/src/thirdparty/datasketches/HllArray-internal.hpp b/be/src/thirdparty/datasketches/HllArray-internal.hpp
index 0a4bdce..4479417 100644
--- a/be/src/thirdparty/datasketches/HllArray-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllArray-internal.hpp
@@ -35,48 +35,16 @@
 namespace datasketches {
 
 template<typename A>
-HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
-  : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
-  hipAccum = 0.0;
-  kxq0 = 1 << lgConfigK;
-  kxq1 = 0.0;
-  curMin = 0;
-  numAtCurMin = 1 << lgConfigK;
-  oooFlag = false;
-  hllByteArr = nullptr; // allocated in derived class
-}
-
-template<typename A>
-HllArray<A>::HllArray(const HllArray<A>& that):
-HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
-hipAccum(that.hipAccum),
-kxq0(that.kxq0),
-kxq1(that.kxq1),
-hllByteArr(nullptr),
-curMin(that.curMin),
-numAtCurMin(that.numAtCurMin),
-oooFlag(that.oooFlag)
-{
-  const int arrayLen = that.getHllByteArrBytes();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  hllByteArr = uint8Alloc().allocate(arrayLen);
-  std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
-}
-
-template<typename A>
-HllArray<A>::~HllArray() {
-  // need to determine number of bytes to deallocate
-  int hllArrBytes = 0;
-  if (this->tgtHllType == target_hll_type::HLL_4) {
-    hllArrBytes = hll4ArrBytes(this->lgConfigK);
-  } else if (this->tgtHllType == target_hll_type::HLL_6) {
-    hllArrBytes = hll6ArrBytes(this->lgConfigK);
-  } else { // tgtHllType == HLL_8
-    hllArrBytes = hll8ArrBytes(this->lgConfigK);
-  }
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  uint8Alloc().deallocate(hllByteArr, hllArrBytes);
-}
+HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize, const A& allocator):
+HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize),
+hipAccum(0.0),
+kxq0(1 << lgConfigK),
+kxq1(0.0),
+hllByteArr(allocator),
+curMin(0),
+numAtCurMin(1 << lgConfigK),
+oooFlag(false)
+{}
 
 template<typename A>
 HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
@@ -93,7 +61,7 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
 }
 
 template<typename A>
-HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
+HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
     throw std::out_of_range("Input data length insufficient to hold HLL array");
   }
@@ -143,11 +111,11 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
     int auxLgIntArrSize = (int) data[4];
     const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
     const uint8_t* auxDataStart = data + offset;
-    auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
+    auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
     aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
   }
 
-  HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
+  HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
   sketch->putCurMin(curMin);
   sketch->putOutOfOrderFlag(oooFlag);
   if (!oooFlag) sketch->putHipAccum(hip);
@@ -155,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
   sketch->putKxQ1(kxq1);
   sketch->putNumAtCurMin(numAtCurMin);
 
-  std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
+  std::memcpy(sketch->hllByteArr.data(), data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
 
   if (auxHashMap != nullptr)
     ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
@@ -165,7 +133,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
 }
 
 template<typename A>
-HllArray<A>* HllArray<A>::newHll(std::istream& is) {
+HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -192,7 +160,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
   const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
   const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
 
-  HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
+  HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
   typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
   hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
   sketch->putCurMin(curMin);
@@ -211,11 +179,11 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
   is.read((char*)&auxCount, sizeof(auxCount));
   sketch->putNumAtCurMin(numAtCurMin);
   
-  is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
+  is.read((char*)sketch->hllByteArr.data(), sketch->getHllByteArrBytes());
   
   if (auxCount > 0) { // necessarily TgtHllType == HLL_4
     int auxLgIntArrSize = listHeader[4];
-    AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
+    AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
     ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
   }
 
@@ -228,7 +196,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
 template<typename A>
 vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
   const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
-  vector_u8<A> byteArr(sketchSizeBytes);
+  vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
   uint8_t* bytes = byteArr.data() + header_size_bytes;
   AuxHashMap<A>* auxHashMap = getAuxHashMap();
 
@@ -249,7 +217,7 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
   std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
 
   const int hllByteArrBytes = getHllByteArrBytes();
-  std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
+  std::memcpy(bytes + getMemDataStart(), hllByteArr.data(), hllByteArrBytes);
 
   // aux map if HLL_4
   if (this->tgtHllType == HLL_4) {
@@ -309,7 +277,7 @@ void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
 
   const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
   os.write((char*)&auxCount, sizeof(auxCount));
-  os.write((char*)hllByteArr, getHllByteArrBytes());
+  os.write((char*)hllByteArr.data(), getHllByteArrBytes());
 
   // aux map if HLL_4
   if (this->tgtHllType == HLL_4) {
@@ -639,12 +607,12 @@ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum)
 
 template<typename A>
 typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
-  return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
+  return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
 }
 
 template<typename A>
 typename HllArray<A>::const_iterator HllArray<A>::end() const {
-  return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
+  return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
 }
 
 template<typename A>
@@ -701,6 +669,11 @@ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t inde
   return array[index];
 }
 
+template<typename A>
+A HllArray<A>::getAllocator() const {
+  return hllByteArr.get_allocator();
+}
+
 }
 
 #endif // _HLLARRAY_INTERNAL_HPP_
diff --git a/be/src/thirdparty/datasketches/HllArray.hpp b/be/src/thirdparty/datasketches/HllArray.hpp
index 1cc64ea..e7be8c1 100644
--- a/be/src/thirdparty/datasketches/HllArray.hpp
+++ b/be/src/thirdparty/datasketches/HllArray.hpp
@@ -28,19 +28,18 @@ namespace datasketches {
 template<typename A>
 class AuxHashMap;
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllArray : public HllSketchImpl<A> {
   public:
-    explicit HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize);
-    explicit HllArray(const HllArray<A>& that);
+    HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
 
-    static HllArray* newHll(const void* bytes, size_t len);
-    static HllArray* newHll(std::istream& is);
+    static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
+    static HllArray* newHll(std::istream& is, const A& allocator);
 
     virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
     virtual void serialize(std::ostream& os, bool compact) const;
 
-    virtual ~HllArray();
+    virtual ~HllArray() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
 
     virtual HllArray* copy() const = 0;
@@ -95,6 +94,8 @@ class HllArray : public HllSketchImpl<A> {
     virtual const_iterator begin(bool all = false) const;
     virtual const_iterator end() const;
 
+    virtual A getAllocator() const;
+
   protected:
     void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
     double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const;
@@ -103,7 +104,7 @@ class HllArray : public HllSketchImpl<A> {
     double hipAccum;
     double kxq0;
     double kxq1;
-    uint8_t* hllByteArr; //init by sub-classes
+    vector_u8<A> hllByteArr; //init by sub-classes
     int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array
     int numAtCurMin; //interpreted as num zeros when curMin == 0
     bool oooFlag; //Out-Of-Order Flag
@@ -115,7 +116,6 @@ template<typename A>
 class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
 public:
   const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
-  //const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
   const_iterator& operator++();
   bool operator!=(const const_iterator& other) const;
   uint32_t operator*() const;
diff --git a/be/src/thirdparty/datasketches/HllSketch-internal.hpp b/be/src/thirdparty/datasketches/HllSketch-internal.hpp
index dd16955..8f7d1f4 100644
--- a/be/src/thirdparty/datasketches/HllSketch-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllSketch-internal.hpp
@@ -42,28 +42,26 @@ typedef union {
 } longDoubleUnion;
 
 template<typename A>
-hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) {
+hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
   HllUtil<A>::checkLgK(lg_config_k);
   if (start_full_size) {
-    sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
+    sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
   } else {
     typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
-    sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
+    sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
   }
 }
 
 template<typename A>
-hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
-  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
-  hll_sketch_alloc<A> sketch(impl);
-  return sketch;
+hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
+  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
+  return hll_sketch_alloc<A>(impl);
 }
 
 template<typename A>
-hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
-  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
-  hll_sketch_alloc<A> sketch(impl);
-  return sketch;
+hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
+  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
+  return hll_sketch_alloc<A>(impl);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/HllSketchImpl.hpp b/be/src/thirdparty/datasketches/HllSketchImpl.hpp
index 82180b4..9f53705 100644
--- a/be/src/thirdparty/datasketches/HllSketchImpl.hpp
+++ b/be/src/thirdparty/datasketches/HllSketchImpl.hpp
@@ -27,7 +27,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllSketchImpl {
   public:
     HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
@@ -66,6 +66,7 @@ class HllSketchImpl {
     virtual bool isEmpty() const = 0;
     virtual bool isOutOfOrderFlag() const = 0;
     virtual void putOutOfOrderFlag(bool oooFlag) = 0;
+    virtual A getAllocator() const = 0;
     bool isStartFullSize() const;
 
   protected:
diff --git a/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp b/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
index eb8dd77..85f9618 100644
--- a/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
+++ b/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
@@ -31,15 +31,15 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllSketchImplFactory final {
 public:
-  static HllSketchImpl<A>* deserialize(std::istream& os);
-  static HllSketchImpl<A>* deserialize(const void* bytes, size_t len);
+  static HllSketchImpl<A>* deserialize(std::istream& os, const A& allocator);
+  static HllSketchImpl<A>* deserialize(const void* bytes, size_t len, const A& allocator);
 
   static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
   static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
-  static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false);
+  static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
   
   // resets the input impl, deleting the input pointer and returning a new pointer
   static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
@@ -51,8 +51,8 @@ public:
 
 template<typename A>
 CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
-  CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType());
+  using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
+  CouponHashSet<A>* chSet = new (ChsAlloc(list.getAllocator()).allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType(), list.getAllocator());
   for (auto coupon: list) {
     chSet->couponUpdate(coupon);
   }
@@ -61,7 +61,7 @@ CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>&
 
 template<typename A>
 HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
-  HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType());
+  HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType(), false, src.getAllocator());
   tgtHllArr->putKxQ0(1 << src.getLgConfigK());
   for (auto coupon: src) {
     tgtHllArr->couponUpdate(coupon);
@@ -72,48 +72,48 @@ HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>&
 }
 
 template<typename A>
-HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is) {
+HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A& allocator) {
   // we'll hand off the sketch based on PreInts so we don't need
   // to move the stream pointer back and forth -- perhaps somewhat fragile?
   const int preInts = is.peek();
   if (preInts == HllUtil<A>::HLL_PREINTS) {
-    return HllArray<A>::newHll(is);
+    return HllArray<A>::newHll(is, allocator);
   } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
-    return CouponHashSet<A>::newSet(is);
+    return CouponHashSet<A>::newSet(is, allocator);
   } else if (preInts == HllUtil<A>::LIST_PREINTS) {
-    return CouponList<A>::newList(is);
+    return CouponList<A>::newList(is, allocator);
   } else {
     throw std::invalid_argument("Attempt to deserialize unknown object type");
   }
 }
 
 template<typename A>
-HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len) {
+HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
   // read current mode directly
   const int preInts = static_cast<const uint8_t*>(bytes)[0];
   if (preInts == HllUtil<A>::HLL_PREINTS) {
-    return HllArray<A>::newHll(bytes, len);
+    return HllArray<A>::newHll(bytes, len, allocator);
   } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
-    return CouponHashSet<A>::newSet(bytes, len);
+    return CouponHashSet<A>::newSet(bytes, len, allocator);
   } else if (preInts == HllUtil<A>::LIST_PREINTS) {
-    return CouponList<A>::newList(bytes, len);
+    return CouponList<A>::newList(bytes, len, allocator);
   } else {
     throw std::invalid_argument("Attempt to deserialize unknown object type");
   }
 }
 
 template<typename A>
-HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize) {
+HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator) {
   switch (tgtHllType) {
     case HLL_8:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-      return new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, startFullSize);
+      using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+      return new (Hll8Alloc(allocator).allocate(1)) Hll8Array<A>(lgConfigK, startFullSize, allocator);
     case HLL_6:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-      return new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, startFullSize);
+      using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+      return new (Hll6Alloc(allocator).allocate(1)) Hll6Array<A>(lgConfigK, startFullSize, allocator);
     case HLL_4:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-      return new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, startFullSize);
+      using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+      return new (Hll4Alloc(allocator).allocate(1)) Hll4Array<A>(lgConfigK, startFullSize, allocator);
   }
   throw std::logic_error("Invalid target_hll_type");
 }
@@ -121,12 +121,12 @@ HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtH
 template<typename A>
 HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) {
   if (startFullSize) {
-    HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize);
+    HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize, impl->getAllocator());
     impl->get_deleter()(impl);
     return hll;
   } else {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
-    CouponList<A>* cl = new (clAlloc().allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST);
+    using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
+    CouponList<A>* cl = new (ClAlloc(impl->getAllocator()).allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST, impl->getAllocator());
     impl->get_deleter()(impl);
     return cl;
   }
@@ -135,8 +135,9 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st
 template<typename A>
 Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-  Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+  Hll4Array<A>* hll4Array = new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll4Array->mergeHll(srcHllArr);
   hll4Array->putHipAccum(srcHllArr.getHipAccum());
@@ -146,8 +147,9 @@ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllAr
 template<typename A>
 Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-  Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+  Hll6Array<A>* hll6Array = new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll6Array->mergeHll(srcHllArr);
   hll6Array->putHipAccum(srcHllArr.getHipAccum());
@@ -157,8 +159,9 @@ Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllAr
 template<typename A>
 Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+  Hll8Array<A>* hll8Array = new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll8Array->mergeHll(srcHllArr);
   hll8Array->putHipAccum(srcHllArr.getHipAccum());
diff --git a/be/src/thirdparty/datasketches/HllUnion-internal.hpp b/be/src/thirdparty/datasketches/HllUnion-internal.hpp
index 0d12fd3..716fab6 100644
--- a/be/src/thirdparty/datasketches/HllUnion-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllUnion-internal.hpp
@@ -32,9 +32,9 @@
 namespace datasketches {
 
 template<typename A>
-hll_union_alloc<A>::hll_union_alloc(const int lg_max_k):
+hll_union_alloc<A>::hll_union_alloc(const int lg_max_k, const A& allocator):
   lg_max_k(HllUtil<A>::checkLgK(lg_max_k)),
-  gadget(lg_max_k, target_hll_type::HLL_8)
+  gadget(lg_max_k, target_hll_type::HLL_8, false, allocator)
 {}
 
 template<typename A>
@@ -150,16 +150,6 @@ double hll_union_alloc<A>::get_upper_bound(const int num_std_dev) const {
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_compact_serialization_bytes() const {
-  return gadget.get_compact_serialization_bytes();
-}
-
-template<typename A>
-int hll_union_alloc<A>::get_updatable_serialization_bytes() const {
-  return gadget.get_updatable_serialization_bytes();
-}
-
-template<typename A>
 int hll_union_alloc<A>::get_lg_config_k() const {
   return gadget.get_lg_config_k();
 }
@@ -170,11 +160,6 @@ void hll_union_alloc<A>::reset() {
 }
 
 template<typename A>
-bool hll_union_alloc<A>::is_compact() const {
-  return gadget.is_compact();
-}
-
-template<typename A>
 bool hll_union_alloc<A>::is_empty() const {
   return gadget.is_empty();
 }
@@ -195,21 +180,11 @@ bool hll_union_alloc<A>::is_estimation_mode() const {
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_serialization_version() const {
-  return HllUtil<A>::SER_VER;
-}
-
-template<typename A>
 target_hll_type hll_union_alloc<A>::get_target_type() const {
   return target_hll_type::HLL_8;
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_max_serialization_bytes(const int lg_k) {
-  return hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(lg_k, target_hll_type::HLL_8);
-}
-
-template<typename A>
 double hll_union_alloc<A>::get_rel_err(const bool upper_bound, const bool unioned,
                            const int lg_config_k, const int num_std_dev) {
   return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
@@ -226,7 +201,7 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>*
     return src->copyAs(HLL_8);
   }
   typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
+  Hll8Array<A>* tgtHllArr = new (hll8Alloc(src->getAllocator()).allocate(1)) Hll8Array<A>(tgt_lg_k, false, src->getAllocator());
   tgtHllArr->mergeHll(*src);
   //both of these are required for isomorphism
   tgtHllArr->putHipAccum(src->getHipAccum());
diff --git a/be/src/thirdparty/datasketches/HllUtil.hpp b/be/src/thirdparty/datasketches/HllUtil.hpp
index ec0ddf2..3a1ebe2 100644
--- a/be/src/thirdparty/datasketches/HllUtil.hpp
+++ b/be/src/thirdparty/datasketches/HllUtil.hpp
@@ -36,7 +36,7 @@ enum hll_mode { LIST = 0, SET, HLL };
 
 // template provides internal consistency and allows static float values
 // but we don't use the template parameter anywhere
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class HllUtil final {
 public:
   // preamble stuff
diff --git a/be/src/thirdparty/datasketches/MurmurHash3.h b/be/src/thirdparty/datasketches/MurmurHash3.h
index b438c7d..c1cbeab 100644
--- a/be/src/thirdparty/datasketches/MurmurHash3.h
+++ b/be/src/thirdparty/datasketches/MurmurHash3.h
@@ -3,6 +3,7 @@
 //  * Changed input seed in MurmurHash3_x64_128 to uint64_t
 //  * Define and use HashState reference to return result
 //  * Made entire hash function defined inline
+//  * Added compute_seed_hash
 //-----------------------------------------------------------------------------
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
@@ -170,4 +171,10 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
 
 //-----------------------------------------------------------------------------
 
+FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
+  return static_cast<uint16_t>(hashes.h1 & 0xffff);
+}
+
 #endif // _MURMURHASH3_H_
diff --git a/be/src/thirdparty/datasketches/README.md b/be/src/thirdparty/datasketches/README.md
index 1c0433d..adecb97 100644
--- a/be/src/thirdparty/datasketches/README.md
+++ b/be/src/thirdparty/datasketches/README.md
@@ -10,8 +10,8 @@ changed during this process as originally the following folders were affected:
 I copied the content of these folders into the same directory so that Impala
 can compile them without rewriting the include paths in the files themselves.
 
-The git branch of the snapshot I used as a source for the files:
-The hash: b2f749ed5ce6ba650f4259602b133c310c3a5ee4
+The git branch of the snapshot I used as a source for the files: 3.0.0
+The hash: 45885c0c8c0807bb9480886d60ca7042000a4c43
 
 Browse the source files here:
-https://github.com/apache/datasketches-cpp/tree/b2f749ed5ce6ba650f4259602b133c310c3a5ee4
+https://github.com/apache/datasketches-cpp/tree/3.0.0
\ No newline at end of file
diff --git a/be/src/thirdparty/datasketches/RelativeErrorTables.hpp b/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
index da8bebf..5e0a3c7 100644
--- a/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
+++ b/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class RelativeErrorTables {
   public:
     /**
diff --git a/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp b/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp
new file mode 100644
index 0000000..e2c5433
--- /dev/null
+++ b/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
+#define BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
+
+#include <cstdint>
+#include <string>
+
+#include "bounds_binomial_proportions.hpp"
+
+namespace datasketches {
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
+ * <ul>
+ * <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
+ * <li><i>|B|</i> is the unknown size of a subset <i>B</i> of <i>A</i>.</li>
+ * <li><i>a</i> = <i>|S<sub>A</sub>|</i> is the observed size of a sample of <i>A</i>
+ * that was obtained by Bernoulli sampling with a known inclusion probability <i>f</i>.</li>
+ * <li><i>b</i> = <i>|S<sub>A</sub> &cap; B|</i> is the observed size of a subset
+ * of <i>S<sub>A</sub></i>.</li>
+ * </ul>
+ */
+class bounds_on_ratios_in_sampled_sets {
+public:
+  static constexpr double NUM_STD_DEVS = 2.0;
+
+  /**
+   * Return the approximate lower bound based on a 95% confidence interval
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i> and should
+   * generally be less than 0.5. Above this value, the results not be reliable.
+   * When <i>f</i> = 1.0 this returns the estimate.
+   * @return the approximate upper bound
+   */
+  static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
+    check_inputs(a, b, f);
+    if (a == 0) return 0.0;
+    if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
+    return bounds_binomial_proportions::approximate_lower_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
+  }
+
+  /**
+   * Return the approximate upper bound based on a 95% confidence interval
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the approximate lower bound
+   */
+  static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
+    check_inputs(a, b, f);
+    if (a == 0) return 1.0;
+    if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
+    return bounds_binomial_proportions::approximate_upper_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
+  }
+
+  /**
+   * Return the estimate of b over a
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @return the estimate of b over a
+   */
+  static double get_estimate_of_b_over_a(uint64_t a, uint64_t b) {
+    check_inputs(a, b, 0.3);
+    if (a == 0) return 0.5;
+    return static_cast<double>(b) / static_cast<double>(a);
+  }
+
+  /**
+   * Return the estimate of A. See class javadoc.
+   * @param a See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the approximate lower bound
+   */
+  static double estimate_of_a(uint64_t a, uint64_t f) {
+    check_inputs(a, 1, f);
+    return a / f;
+  }
+
+  /**
+   * Return the estimate of B. See class javadoc.
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>b</i>.
+   * @return the approximate lower bound
+   */
+  static double estimate_of_b(uint64_t b, double f) {
+    check_inputs(b + 1, b, f);
+    return b / f;
+  }
+
+private:
+  /**
+   * This hackyAdjuster is tightly coupled with the width of the confidence interval normally
+   * specified with number of standard deviations. To simplify this interface the number of
+   * standard deviations has been fixed to 2.0, which corresponds to a confidence interval of
+   * 95%.
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the hacky Adjuster
+   */
+  static double hacky_adjuster(double f) {
+    const double tmp = sqrt(1.0 - f);
+    return (f <= 0.5) ? tmp : tmp + (0.01 * (f - 0.5));
+  }
+
+  static void check_inputs(uint64_t a, uint64_t b, double f) {
+    if (a < b) {
+      throw std::invalid_argument("a must be >= b: a = " + std::to_string(a) + ", b = " + std::to_string(b));
+    }
+    if ((f > 1.0) || (f <= 0.0)) {
+      throw std::invalid_argument("Required: ((f <= 1.0) && (f > 0.0)): " + std::to_string(f));
+    }
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp b/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp
new file mode 100644
index 0000000..1779ec1
--- /dev/null
+++ b/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
+#define BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
+
+#include <cstdint>
+#include <stdexcept>
+
+#include "bounds_on_ratios_in_sampled_sets.hpp"
+
+namespace datasketches {
+
+/**
+ * This is to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
+ * <ul>
+ * <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
+ * <li><i>B</i> is a Theta Sketch of population <i>PopB</i> that is a subset of <i>A</i>,
+ * obtained by an intersection of <i>A</i> with some other Theta Sketch <i>C</i>,
+ * which acts like a predicate or selection clause.</li>
+ * <li>The estimate of the ratio <i>PopB/PopA</i> is
+ * estimate_of_b_over_a(<i>A, B</i>).</li>
+ * <li>The Upper Bound estimate on the ratio PopB/PopA is
+ * upper_bound_for_b_over_a(<i>A, B</i>).</li>
+ * <li>The Lower Bound estimate on the ratio PopB/PopA is
+ * lower_bound_for_b_over_a(<i>A, B</i>).</li>
+ * </ul>
+ * Note: The theta of <i>A</i> cannot be greater than the theta of <i>B</i>.
+ * If <i>B</i> is formed as an intersection of <i>A</i> and some other set <i>C</i>,
+ * then the theta of <i>B</i> is guaranteed to be less than or equal to the theta of <i>B</i>.
+ */
+template<typename ExtractKey>
+class bounds_on_ratios_in_theta_sketched_sets {
+public:
+  /**
+   * Gets the approximate lower bound for B over A based on a 95% confidence interval
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the approximate lower bound for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double lower_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = theta64_a == theta64_b
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 0;
+    const double f = sketch_b.get_theta();
+    return bounds_on_ratios_in_sampled_sets::lower_bound_for_b_over_a(count_a, count_b, f);
+  }
+
+  /**
+   * Gets the approximate upper bound for B over A based on a 95% confidence interval
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the approximate upper bound for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double upper_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = (theta64_a == theta64_b)
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 1;
+    const double f = sketch_b.get_theta();
+    return bounds_on_ratios_in_sampled_sets::upper_bound_for_b_over_a(count_a, count_b, f);
+  }
+
+  /**
+   * Gets the estimate for B over A
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the estimate for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double estimate_of_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = (theta64_a == theta64_b)
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 0.5;
+    return static_cast<double>(count_b) / static_cast<double>(count_a);
+  }
+
+private:
+
+  static inline void check_thetas(uint64_t theta_a, uint64_t theta_b) {
+    if (theta_b > theta_a) {
+      throw std::invalid_argument("theta_a must be <= theta_b");
+    }
+  }
+
+  template<typename Sketch>
+  static uint64_t count_less_than_theta64(const Sketch& sketch, uint64_t theta) {
+    uint64_t count = 0;
+    for (const auto& entry: sketch) if (ExtractKey()(entry) < theta) ++count;
+    return count;
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/cpc_common.hpp b/be/src/thirdparty/datasketches/cpc_common.hpp
index 9a766b8..cde110f 100644
--- a/be/src/thirdparty/datasketches/cpc_common.hpp
+++ b/be/src/thirdparty/datasketches/cpc_common.hpp
@@ -44,6 +44,8 @@ template<typename A> class u32_table;
 
 template<typename A>
 struct compressed_state {
+  explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
+      window_data(allocator), window_data_words(0) {}
   vector_u32<A> table_data;
   uint32_t table_data_words;
   uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
@@ -53,6 +55,7 @@ struct compressed_state {
 
 template<typename A>
 struct uncompressed_state {
+  explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
   u32_table<A> table;
   vector_u8<A> window;
 };
diff --git a/be/src/thirdparty/datasketches/cpc_compressor.hpp b/be/src/thirdparty/datasketches/cpc_compressor.hpp
index 55fa3b8..73db797 100644
--- a/be/src/thirdparty/datasketches/cpc_compressor.hpp
+++ b/be/src/thirdparty/datasketches/cpc_compressor.hpp
@@ -129,14 +129,14 @@ private:
   void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
 
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
+  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
   void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
 
   static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
   static size_t safe_length_for_compressed_window_buf(uint64_t k);
   static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
 
-  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space);
+  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
   static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
 };
 
diff --git a/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp b/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
index b951b05..e3398c8 100644
--- a/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
@@ -160,7 +160,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
   switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
     case cpc_sketch_alloc<A>::flavor::EMPTY:
-      target.table = u32_table<A>(2, 6 + lg_k);
+      target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
       break;
     case cpc_sketch_alloc<A>::flavor::SPARSE:
       uncompress_sparse_flavor(source, target, lg_k);
@@ -191,8 +191,9 @@ template<typename A>
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
 }
 
 // This is complicated because it effectively builds a Sparse version
@@ -206,7 +207,7 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
   const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
 
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size());
+  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size(), source.get_allocator());
 
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -221,7 +222,8 @@ template<typename A>
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
 
   // In the hybrid flavor, some of these pairs actually
   // belong in the window, so we will separate them out,
@@ -240,7 +242,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour
       pairs[next_true_pair++] = row_col; // move true pair down
     }
   }
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
 }
 
 template<typename A>
@@ -264,21 +266,23 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
     for (size_t i = 0; i < num_pairs; i++) {
       if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
       pairs[i] += 8;
     }
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -314,15 +318,17 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
 
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
     if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
@@ -342,7 +348,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
       pairs[i] = (row << 6) | col;
     }
 
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -364,9 +370,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u
 }
 
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
+vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs,
+    uint8_t lg_k, const A& allocator) const {
   const size_t k = 1 << lg_k;
-  vector_u32<A> pairs(num_pairs);
+  vector_u32<A> pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
@@ -388,7 +395,8 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window,
+    uint8_t lg_k, uint32_t num_coupons) const {
   const size_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
   const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -710,9 +718,10 @@ void write_unary(
 // The empty space that this leaves at the beginning of the output array
 // will be filled in later by the caller.
 template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) {
+vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
+    uint32_t empty_space, const A& allocator) {
   const size_t output_length = empty_space + num_pairs_to_get;
-  vector_u32<A> pairs(output_length);
+  vector_u32<A> pairs(output_length, 0, allocator);
   size_t pair_index = empty_space;
   for (unsigned row_index = 0; row_index < k; row_index++) {
     uint8_t byte = window[row_index];
diff --git a/be/src/thirdparty/datasketches/cpc_sketch.hpp b/be/src/thirdparty/datasketches/cpc_sketch.hpp
index 9aba16f..a4bf8f6 100644
--- a/be/src/thirdparty/datasketches/cpc_sketch.hpp
+++ b/be/src/thirdparty/datasketches/cpc_sketch.hpp
@@ -49,7 +49,7 @@ template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_union_alloc;
 
 // alias with default allocator for convenience
-typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
+using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>;
 
 // allocation and initialization of global decompression (decoding) tables
 // call this before anything else if you want to control the initialization time
@@ -67,7 +67,10 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+
+  using allocator_type = A;
+  A get_allocator() const;
 
   /**
    * @return configured lg_k of this sketch
@@ -204,7 +207,7 @@ public:
 
   // This is a convenience alias for users
   // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  using vector_bytes = vector_u8<A>;
 
   /**
    * This method serializes the sketch as a vector of bytes.
@@ -221,7 +224,7 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of a sketch
    */
-  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -230,7 +233,7 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of the sketch
    */
-  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   // for internal use
   uint32_t get_num_coupons() const;
diff --git a/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp b/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
index e6bc010..a314de8 100644
--- a/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
@@ -41,13 +41,13 @@ void cpc_init() {
 }
 
 template<typename A>
-cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
+cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 was_merged(false),
 num_coupons(0),
-surprising_value_table(2, 6 + lg_k),
-sliding_window(),
+surprising_value_table(2, 6 + lg_k, allocator),
+sliding_window(allocator),
 window_offset(0),
 first_interesting_column(0),
 kxp(1 << lg_k),
@@ -59,6 +59,11 @@ hip_est_accum(0)
 }
 
 template<typename A>
+A cpc_sketch_alloc<A>::get_allocator() const {
+  return sliding_window.get_allocator();
+}
+
+template<typename A>
 uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
   return lg_k;
 }
@@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
 
   sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
 
-  u32_table<A> new_table(2, 6 + lg_k);
+  u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
 
   const uint32_t* old_slots = surprising_value_table.get_slots();
   const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
@@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
 
 template<typename A>
 void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(A(sliding_window.get_allocator()));
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
 
 template<typename A>
 vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
   const uint8_t serial_version = SERIAL_VERSION;
@@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
   const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   if (!is.good())
     throw std::runtime_error("error reading from std::istream"); 
@@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
@@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
   ensure_minimum_memory(size, preamble_ints << 2);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
       std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
@@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row);
+  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
 
   if (num_coupons == 0) return matrix;
 
diff --git a/be/src/thirdparty/datasketches/cpc_union.hpp b/be/src/thirdparty/datasketches/cpc_union.hpp
index e56aa72..dd59abc 100644
--- a/be/src/thirdparty/datasketches/cpc_union.hpp
+++ b/be/src/thirdparty/datasketches/cpc_union.hpp
@@ -35,7 +35,7 @@ namespace datasketches {
  */
 
 // alias with default allocator for convenience
-typedef cpc_union_alloc<std::allocator<void>> cpc_union;
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 
 template<typename A>
 class cpc_union_alloc {
@@ -45,7 +45,7 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   cpc_union_alloc(const cpc_union_alloc<A>& other);
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
diff --git a/be/src/thirdparty/datasketches/cpc_union_impl.hpp b/be/src/thirdparty/datasketches/cpc_union_impl.hpp
index 65d933c..5acfe5f 100644
--- a/be/src/thirdparty/datasketches/cpc_union_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_union_impl.hpp
@@ -25,16 +25,16 @@
 namespace datasketches {
 
 template<typename A>
-cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
+cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 accumulator(nullptr),
-bit_matrix()
+bit_matrix(allocator)
 {
   if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
     throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
   }
-  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
+  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
 
 template<typename A>
@@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
 
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
 
-  vector_u8<A> sliding_window(k);
+  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
 
   // dynamically growing caused snowplow effect
   uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
   if (table_lg_size < 2) table_lg_size = 2;
-  u32_table<A> table(table_lg_size, 6 + lg_k);
+  u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
 
   // the following should work even when the offset is zero
   const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
@@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const size_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0);
+    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;
diff --git a/be/src/thirdparty/datasketches/cpc_util.hpp b/be/src/thirdparty/datasketches/cpc_util.hpp
index b63f26f..1a33b3a 100644
--- a/be/src/thirdparty/datasketches/cpc_util.hpp
+++ b/be/src/thirdparty/datasketches/cpc_util.hpp
@@ -24,12 +24,6 @@
 
 namespace datasketches {
 
-static inline uint16_t compute_seed_hash(uint64_t seed) {
-  HashState hashes;
-  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
-  return hashes.h1 & 0xffff;
-}
-
 static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
   if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
   const uint64_t quotient = x / y;
diff --git a/be/src/thirdparty/datasketches/hll.hpp b/be/src/thirdparty/datasketches/hll.hpp
index 3898dda..a65b945 100644
--- a/be/src/thirdparty/datasketches/hll.hpp
+++ b/be/src/thirdparty/datasketches/hll.hpp
@@ -108,7 +108,7 @@ class hll_union_alloc;
 template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
 template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
 
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class hll_sketch_alloc final {
   public:
     /**
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
      *        keeping memory use constant (if HLL_6 or HLL_8) at the cost of
      *        starting out using much more memory
      */
-    explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false);
+    explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
 
     /**
      * Copy constructor
@@ -140,14 +140,14 @@ class hll_sketch_alloc final {
      * Reconstructs a sketch from a serialized image on a stream.
      * @param is An input stream with a binary image of a sketch
      */
-    static hll_sketch_alloc deserialize(std::istream& is);
+    static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
 
     /**
      * Reconstructs a sketch from a serialized image in a byte array.
      * @param is bytes An input array with a binary image of a sketch
      * @param len Length of the input array, in bytes
      */
-    static hll_sketch_alloc deserialize(const void* bytes, size_t len);
+    static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
 
     //! Class destructor
     virtual ~hll_sketch_alloc();
@@ -423,7 +423,7 @@ class hll_sketch_alloc final {
  * author Kevin Lang
  */
  
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class hll_union_alloc {
   public:
     /**
@@ -431,7 +431,7 @@ class hll_union_alloc {
      * @param lg_max_k The maximum size, in log2, of k. The value must
      * be between 7 and 21, inclusive.
      */
-    explicit hll_union_alloc(int lg_max_k);
+    explicit hll_union_alloc(int lg_max_k, const A& allocator = A());
 
     /**
      * Returns the current cardinality estimate
@@ -469,18 +469,6 @@ class hll_union_alloc {
     double get_upper_bound(int num_std_dev) const;
 
     /**
-     * Returns the size of the union serialized in compact form.
-     * @return Size of the union serialized in compact form, in bytes.
-     */
-    int get_compact_serialization_bytes() const;
-
-    /**
-     * Returns the size of the union serialized without compaction.
-     * @return Size of the union serialized without compaction, in bytes.
-     */
-    int get_updatable_serialization_bytes() const;
-
-    /**
      * Returns union's configured lg_k value.
      * @return Configured lg_k value.
      */
@@ -493,12 +481,6 @@ class hll_union_alloc {
     target_hll_type get_target_type() const;
 
     /**
-     * Indicates if the union is currently stored compacted.
-     * @return True if the union is stored in compact form.
-     */
-    bool is_compact() const;
-
-    /**
      * Indicates if the union is currently empty.
      * @return True if the union is empty.
      */
@@ -606,15 +588,6 @@ class hll_union_alloc {
     void update(const void* data, size_t length_bytes);
 
     /**
-     * Returns the maximum size in bytes that this union operator can grow to given a lg_k.
-     *
-     * @param lg_k The maximum Log2 of k for this union operator. This value must be
-     * between 4 and 21 inclusively.
-     * @return the maximum size in bytes that this union operator can grow to.
-     */
-    static int get_max_serialization_bytes(int lg_k);
-
-    /**
      * Gets the current (approximate) Relative Error (RE) asymptotic values given several
      * parameters. This is used primarily for testing.
      * @param upper_bound return the RE for the Upper Bound, otherwise for the Lower Bound.
@@ -645,7 +618,6 @@ class hll_union_alloc {
     void coupon_update(int coupon);
 
     hll_mode get_current_mode() const;
-    int get_serialization_version() const;
     bool is_out_of_order_flag() const;
     bool is_estimation_mode() const;
 
diff --git a/be/src/thirdparty/datasketches/icon_estimator.hpp b/be/src/thirdparty/datasketches/icon_estimator.hpp
index 27d76ca..4a9daea 100644
--- a/be/src/thirdparty/datasketches/icon_estimator.hpp
+++ b/be/src/thirdparty/datasketches/icon_estimator.hpp
@@ -231,7 +231,7 @@ static const double ICON_POLYNOMIAL_COEFFICIENTS[ICON_TABLE_SIZE] = {
 #endif
 };
 
-static double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
+static inline double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
   const int final = start + num - 1;
   double total = coefficients[final];
   for (int j = final - 1; j >= start; j--) {
@@ -241,11 +241,11 @@ static double evaluate_polynomial(const double* coefficients, int start, int num
   return total;
 }
 
-static double icon_exponential_approximation(double k, double c) {
+static inline double icon_exponential_approximation(double k, double c) {
   return (0.7940236163830469 * k * pow(2.0, c / k));
 }
 
-static double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
+static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
   if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
   if (c < 2) return ((c == 0) ? 0.0 : 1.0);
   const size_t k = 1 << lg_k;
diff --git a/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp b/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
index bc60f26..5114399 100644
--- a/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
+++ b/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
@@ -28,7 +28,7 @@ template <typename T, typename C, typename A>
 class kll_quantile_calculator {
   public:
     // assumes that all levels are sorted including level 0
-    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n);
+    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator);
     T get_quantile(double fraction) const;
 
   private:
diff --git a/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp b/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
index f580819..23efa4d 100644
--- a/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
+++ b/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
@@ -29,8 +29,8 @@
 namespace datasketches {
 
 template <typename T, typename C, typename A>
-kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n):
-n_(n), levels_(num_levels + 1)
+kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator):
+n_(n), levels_(num_levels + 1, 0, allocator), entries_(allocator)
 {
   const uint32_t num_items = levels[num_levels] - levels[0];
   entries_.reserve(num_items);
@@ -116,7 +116,7 @@ uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint6
 template <typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
   if (num_levels == 1) return;
-  Container temporary;
+  Container temporary(entries.get_allocator());
   temporary.reserve(num_items);
   merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
 }
diff --git a/be/src/thirdparty/datasketches/kll_sketch.hpp b/be/src/thirdparty/datasketches/kll_sketch.hpp
index a4530c9..bbca76f 100644
--- a/be/src/thirdparty/datasketches/kll_sketch.hpp
+++ b/be/src/thirdparty/datasketches/kll_sketch.hpp
@@ -161,7 +161,7 @@ class kll_sketch {
     static const uint16_t MIN_K = DEFAULT_M;
     static const uint16_t MAX_K = (1 << 16) - 1;
 
-    explicit kll_sketch(uint16_t k = DEFAULT_K);
+    explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
     kll_sketch(const kll_sketch& other);
     kll_sketch(kll_sketch&& other) noexcept;
     ~kll_sketch();
@@ -203,6 +203,12 @@ class kll_sketch {
     bool is_empty() const;
 
     /**
+     * Returns configured parameter k
+     * @return parameter k
+     */
+    uint16_t get_k() const;
+
+    /**
      * Returns the length of the input stream.
      * @return stream length
      */
@@ -401,7 +407,7 @@ class kll_sketch {
      * @param is input stream
      * @return an instance of a sketch
      */
-    static kll_sketch deserialize(std::istream& is);
+    static kll_sketch<T, C, S, A> deserialize(std::istream& is, const A& allocator = A());
 
     /**
      * This method deserializes a sketch from a given array of bytes.
@@ -409,7 +415,7 @@ class kll_sketch {
      * @param size the size of the array
      * @return an instance of a sketch
      */
-    static kll_sketch deserialize(const void* bytes, size_t size);
+    static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size, const A& allocator = A());
 
     /*
      * Gets the normalized rank error given k and pmf.
@@ -461,6 +467,7 @@ class kll_sketch {
     static const uint8_t PREAMBLE_INTS_SHORT = 2; // for empty and single item
     static const uint8_t PREAMBLE_INTS_FULL = 5;
 
+    A allocator_;
     uint16_t k_;
     uint8_t m_; // minimum buffer "width"
     uint16_t min_k_; // for error estimation after merging with different k
diff --git a/be/src/thirdparty/datasketches/kll_sketch_impl.hpp b/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
index f0c5ff3..0e0ef87 100644
--- a/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
@@ -30,13 +30,14 @@
 namespace datasketches {
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(uint16_t k):
+kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
+allocator_(allocator),
 k_(k),
 m_(DEFAULT_M),
 min_k_(k),
 n_(0),
 num_levels_(1),
-levels_(2),
+levels_(2, 0, allocator),
 items_(nullptr),
 items_size_(k_),
 min_value_(nullptr),
@@ -47,11 +48,12 @@ is_level_zero_sorted_(false)
     throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
   }
   levels_[0] = levels_[1] = k;
-  items_ = A().allocate(items_size_);
+  items_ = allocator_.allocate(items_size_);
 }
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
+allocator_(other.allocator_),
 k_(other.k_),
 m_(other.m_),
 min_k_(other.min_k_),
@@ -64,14 +66,15 @@ min_value_(nullptr),
 max_value_(nullptr),
 is_level_zero_sorted_(other.is_level_zero_sorted_)
 {
-  items_ = A().allocate(items_size_);
+  items_ = allocator_.allocate(items_size_);
   std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
-  if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
-  if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
+  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
 }
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
+allocator_(std::move(other.allocator_)),
 k_(other.k_),
 m_(other.m_),
 min_k_(other.min_k_),
@@ -91,7 +94,8 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
-  kll_sketch copy(other);
+  kll_sketch<T, C, S, A> copy(other);
+  std::swap(allocator_, copy.allocator_);
   std::swap(k_, copy.k_);
   std::swap(m_, copy.m_);
   std::swap(min_k_, copy.min_k_);
@@ -108,6 +112,7 @@ kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& othe
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
+  std::swap(allocator_, other.allocator_);
   std::swap(k_, other.k_);
   std::swap(m_, other.m_);
   std::swap(min_k_, other.min_k_);
@@ -128,15 +133,15 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
     const uint32_t begin = levels_[0];
     const uint32_t end = levels_[num_levels_];
     for (uint32_t i = begin; i < end; i++) items_[i].~T();
-    A().deallocate(items_, items_size_);
+    allocator_.deallocate(items_, items_size_);
   }
   if (min_value_ != nullptr) {
     min_value_->~T();
-    A().deallocate(min_value_, 1);
+    allocator_.deallocate(min_value_, 1);
   }
   if (max_value_ != nullptr) {
     max_value_->~T();
-    A().deallocate(max_value_, 1);
+    allocator_.deallocate(max_value_, 1);
   }
 }
 
@@ -159,8 +164,8 @@ void kll_sketch<T, C, S, A>::update(T&& value) {
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(value);
-    max_value_ = new (A().allocate(1)) T(value);
+    min_value_ = new (allocator_.allocate(1)) T(value);
+    max_value_ = new (allocator_.allocate(1)) T(value);
   } else {
     if (C()(value, *min_value_)) *min_value_ = value;
     if (C()(*max_value_, value)) *max_value_ = value;
@@ -182,8 +187,8 @@ void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
     throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
   }
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(*other.min_value_);
-    max_value_ = new (A().allocate(1)) T(*other.max_value_);
+    min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+    max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
   } else {
     if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
     if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
@@ -206,8 +211,8 @@ void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
     throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
   }
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(std::move(*other.min_value_));
-    max_value_ = new (A().allocate(1)) T(std::move(*other.max_value_));
+    min_value_ = new (allocator_.allocate(1)) T(std::move(*other.min_value_));
+    max_value_ = new (allocator_.allocate(1)) T(std::move(*other.max_value_));
   } else {
     if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
     if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
@@ -229,6 +234,11 @@ bool kll_sketch<T, C, S, A>::is_empty() const {
 }
 
 template<typename T, typename C, typename S, typename A>
+uint16_t kll_sketch<T, C, S, A>::get_k() const {
+  return k_;
+}
+
+template<typename T, typename C, typename S, typename A>
 uint64_t kll_sketch<T, C, S, A>::get_n() const {
   return n_;
 }
@@ -270,8 +280,7 @@ T kll_sketch<T, C, S, A>::get_quantile(double fraction) const {
 
 template<typename T, typename C, typename S, typename A>
 std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
-  std::vector<T, A> quantiles;
-  quantiles.reserve(size);
+  std::vector<T, A> quantiles(allocator_);
   if (is_empty()) return quantiles;
   std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
   quantiles.reserve(size);
@@ -295,11 +304,11 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions,
 
 template<typename T, typename C, typename S, typename A>
 std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(size_t num) const {
-  if (is_empty()) return std::vector<T, A>();
+  if (is_empty()) return std::vector<T, A>(allocator_);
   if (num == 0) {
     throw std::invalid_argument("num must be > 0");
   }
-  std::vector<double> fractions(num);
+  vector_d<A> fractions(num, 0, allocator_);
   fractions[0] = 0.0;
   for (size_t i = 1; i < num; i++) {
     fractions[i] = static_cast<double>(i) / (num - 1);
@@ -411,7 +420,7 @@ template<typename T, typename C, typename S, typename A>
 vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const {
   const bool is_single_item = n_ == 1;
   const size_t size = header_size_bytes + get_serialized_size_bytes();
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, allocator_);
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
   const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
@@ -449,7 +458,7 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
 }
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
+kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -472,7 +481,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
 
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
   const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
-  if (is_empty) return kll_sketch(k);
+  if (is_empty) return kll_sketch(k, allocator);
 
   uint64_t n;
   uint16_t min_k;
@@ -488,7 +497,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
     is.read((char*)&num_levels, sizeof(num_levels));
     is.read((char*)&unused, sizeof(unused));
   }
-  vector_u32<A> levels(num_levels + 1);
+  vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -497,41 +506,43 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
     is.read((char*)levels.data(), sizeof(levels[0]) * num_levels);
   }
   levels[num_levels] = capacity;
-  auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value;
-  std::unique_ptr<T, item_deleter> max_value;
+  A alloc(allocator);
+  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
   if (!is_single_item) {
     S().deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     S().deserialize(is, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
-  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
+  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
+  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
   const auto num_items = levels[num_levels] - levels[0];
   S().deserialize(is, &items_buffer.get()[levels[0]], num_items);
   // serde call did not throw, repackage with destrtuctors
-  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
+  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
     new (min_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     new (max_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  if (!is.good())
+    throw std::runtime_error("error reading from std::istream");
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
       std::move(min_value), std::move(max_value), is_level_zero_sorted);
 }
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size) {
+kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   uint8_t preamble_ints;
@@ -555,7 +566,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
   ensure_minimum_memory(size, 1 << preamble_ints);
 
   const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
-  if (is_empty) return kll_sketch<T, C, S, A>(k);
+  if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
 
   uint64_t n;
   uint16_t min_k;
@@ -572,7 +583,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
     ptr += copy_from_mem(ptr, &num_levels, sizeof(num_levels));
     ptr++; // skip unused byte
   }
-  vector_u32<A> levels(num_levels + 1);
+  vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -581,35 +592,36 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
     ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
   }
   levels[num_levels] = capacity;
-  auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value;
-  std::unique_ptr<T, item_deleter> max_value;
+  A alloc(allocator);
+  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
   if (!is_single_item) {
     ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
-  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
+  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
+  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
   const auto num_items = levels[num_levels] - levels[0];
   ptr += S().deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
   // serde call did not throw, repackage with destrtuctors
-  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
+  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
   const size_t delta = ptr - static_cast<const char*>(bytes);
   if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
     new (min_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     new (max_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
       std::move(min_value), std::move(max_value), is_level_zero_sorted);
@@ -634,6 +646,7 @@ template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
     std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
     std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
+allocator_(levels.get_allocator()),
 k_(k),
 m_(DEFAULT_M),
 min_k_(min_k),
@@ -735,9 +748,9 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
   const uint32_t new_total_cap = cur_total_cap + delta_cap;
 
   // move (and shift) the current data into the new buffer
-  T* new_buf = A().allocate(new_total_cap);
+  T* new_buf = allocator_.allocate(new_total_cap);
   kll_helper::move_construct<T>(items_, 0, cur_total_cap, new_buf, delta_cap, true);
-  A().deallocate(items_, items_size_);
+  allocator_.deallocate(items_, items_size_);
   items_ = new_buf;
   items_size_ = new_total_cap;
 
@@ -763,19 +776,20 @@ void kll_sketch<T, C, S, A>::sort_level_zero() {
 template<typename T, typename C, typename S, typename A>
 std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> kll_sketch<T, C, S, A>::get_quantile_calculator() {
   sort_level_zero();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>> AllocCalc;
+  using AllocCalc = typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>>;
+  AllocCalc alloc(allocator_);
   std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
-    new (AllocCalc().allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_),
-    [](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); AllocCalc().deallocate(ptr, 1); }
+    new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_, allocator_),
+    [&alloc](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); alloc.deallocate(ptr, 1); }
   );
   return quantile_calculator;
 }
 
 template<typename T, typename C, typename S, typename A>
 vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
-  if (is_empty()) return vector_d<A>();
+  if (is_empty()) return vector_d<A>(allocator_);
   kll_helper::validate_values<T, C>(split_points, size);
-  vector_d<A> buckets(size + 1, 0);
+  vector_d<A> buckets(size + 1, 0, allocator_);
   uint8_t level = 0;
   uint64_t weight = 1;
   while (level < num_levels_) {
@@ -845,12 +859,13 @@ template<typename T, typename C, typename S, typename A>
 template<typename O>
 void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
   const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
-  auto tmp_items_deleter = [tmp_num_items](T* ptr) { A().deallocate(ptr, tmp_num_items); }; // no destructor needed
-  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(A().allocate(tmp_num_items), tmp_items_deleter);
+  A alloc(allocator_);
+  auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
+  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
   const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
   const size_t work_levels_size = ub + 2; // ub+1 does not work
-  vector_u32<A> worklevels(work_levels_size);
-  vector_u32<A> outlevels(work_levels_size);
+  vector_u32<A> worklevels(work_levels_size, 0, allocator_);
+  vector_u32<A> outlevels(work_levels_size, 0, allocator_);
 
   const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
 
@@ -864,9 +879,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
 
   // now we need to transfer the results back into "this" sketch
   if (result.final_capacity != items_size_) {
-    A().deallocate(items_, items_size_);
+    allocator_.deallocate(items_, items_size_);
     items_size_ = result.final_capacity;
-    items_ = A().allocate(items_size_);
+    items_ = allocator_.allocate(items_size_);
   }
   const uint32_t free_space_at_bottom = result.final_capacity - result.final_num_items;
   kll_helper::move_construct<T>(workbuf.get(), outlevels[0], outlevels[0] + result.final_num_items, items_, free_space_at_bottom, true);
@@ -1101,29 +1116,32 @@ const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator
 template<typename T, typename C, typename S, typename A>
 class kll_sketch<T, C, S, A>::item_deleter {
   public:
-  void operator() (T* ptr) const {
+  item_deleter(const A& allocator): allocator_(allocator) {}
+  void operator() (T* ptr) {
     if (ptr != nullptr) {
       ptr->~T();
-      A().deallocate(ptr, 1);
+      allocator_.deallocate(ptr, 1);
     }
   }
+  private:
+  A allocator_;
 };
 
 template<typename T, typename C, typename S, typename A>
 class kll_sketch<T, C, S, A>::items_deleter {
   public:
-  items_deleter(uint32_t start, uint32_t num): start(start), num(num) {}
-  void operator() (T* ptr) const {
+  items_deleter(uint32_t start, uint32_t num, const A& allocator):
+    allocator_(allocator), start_(start), num_(num) {}
+  void operator() (T* ptr) {
     if (ptr != nullptr) {
-      for (uint32_t i = start; i < num; ++i) {
-        ptr[i].~T();
-      }
-      A().deallocate(ptr, num);
+      for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
+      allocator_.deallocate(ptr, num_);
     }
   }
   private:
-  uint32_t start;
-  uint32_t num;
+  A allocator_;
+  uint32_t start_;
+  uint32_t num_;
 };
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/memory_operations.hpp b/be/src/thirdparty/datasketches/memory_operations.hpp
index 80dc3a3..986b2b0 100644
--- a/be/src/thirdparty/datasketches/memory_operations.hpp
+++ b/be/src/thirdparty/datasketches/memory_operations.hpp
@@ -52,6 +52,18 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
   return size;
 }
 
+template<typename T>
+static inline size_t copy_to_mem(const T& item, void* dst) {
+  memcpy(dst, &item, sizeof(T));
+  return sizeof(T);
+}
+
+template<typename T>
+static inline size_t copy_from_mem(const void* src, T& item) {
+  memcpy(&item, src, sizeof(T));
+  return sizeof(T);
+}
+
 } // namespace
 
 #endif // _MEMORY_OPERATIONS_HPP_
diff --git a/be/src/thirdparty/datasketches/theta_a_not_b.hpp b/be/src/thirdparty/datasketches/theta_a_not_b.hpp
index db66ac7..4beef60 100644
--- a/be/src/thirdparty/datasketches/theta_a_not_b.hpp
+++ b/be/src/thirdparty/datasketches/theta_a_not_b.hpp
@@ -20,51 +20,34 @@
 #ifndef THETA_A_NOT_B_HPP_
 #define THETA_A_NOT_B_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
 #include "theta_sketch.hpp"
-#include "common_defs.hpp"
+#include "theta_set_difference_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_a_not_b_alloc {
 public:
-  /**
-   * Creates an instance of the a-not-b operation (set difference) with a given has seed.
-   * @param seed hash seed
-   */
-  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED);
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+  using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, Allocator>;
+
+  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Computes the a-not-b set operation given two sketches.
    * @return the result of a-not-b
    */
-  compact_theta_sketch_alloc<A> compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered = true) const;
+  template<typename FwdSketch, typename Sketch>
+  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  uint16_t seed_hash_;
-
-  class less_than {
-  public:
-    explicit less_than(uint64_t value): value(value) {}
-    bool operator()(uint64_t value) const { return value < this->value; }
-  private:
-    uint64_t value;
-  };
+  State state_;
 };
 
 // alias with default allocator for convenience
-typedef theta_a_not_b_alloc<std::allocator<void>> theta_a_not_b;
+using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
diff --git a/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp b/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
index 4343ee3..4c17bbf 100644
--- a/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
@@ -26,56 +26,15 @@
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_a_not_b_alloc<A>::theta_a_not_b_alloc(uint64_t seed):
-seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
+theta_a_not_b_alloc<A>::theta_a_not_b_alloc(uint64_t seed, const A& allocator):
+state_(seed, allocator)
 {}
 
 template<typename A>
-compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered) const {
-  if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered);
-  if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
-  if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");
-
-  const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
-  vector_u64<A> keys;
-  bool is_empty = a.is_empty();
-
-  if (b.get_num_retained() == 0) {
-    std::copy_if(a.begin(), a.end(), std::back_inserter(keys), less_than(theta));
-  } else {
-    if (a.is_ordered() && b.is_ordered()) { // sort-based
-      std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, less_than(theta)));
-    } else { // hash-based
-      const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-      vector_u64<A> b_hash_table(1 << lg_size, 0);
-      for (auto key: b) {
-        if (key < theta) {
-          update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table.data(), lg_size);
-        } else if (b.is_ordered()) {
-          break; // early stop
-        }
-      }
-
-      // scan A lookup B
-      for (auto key: a) {
-        if (key < theta) {
-          if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table.data(), lg_size)) keys.push_back(key);
-        } else if (a.is_ordered()) {
-          break; // early stop
-        }
-      }
-    }
-  }
-  if (keys.empty() && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
-  if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered);
+template<typename FwdSketch, typename Sketch>
+auto theta_a_not_b_alloc<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
+  return state_.compute(std::forward<FwdSketch>(a), b, ordered);
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/CubicInterpolation.hpp b/be/src/thirdparty/datasketches/theta_comparators.hpp
similarity index 57%
copy from be/src/thirdparty/datasketches/CubicInterpolation.hpp
copy to be/src/thirdparty/datasketches/theta_comparators.hpp
index b9cdfe7..e8a39b7 100644
--- a/be/src/thirdparty/datasketches/CubicInterpolation.hpp
+++ b/be/src/thirdparty/datasketches/theta_comparators.hpp
@@ -17,27 +17,32 @@
  * under the License.
  */
 
-#ifndef _CUBICINTERPOLATION_HPP_
-#define _CUBICINTERPOLATION_HPP_
-
-#include <memory>
+#ifndef THETA_COMPARATORS_HPP_
+#define THETA_COMPARATORS_HPP_
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CubicInterpolation {
-  public:
-    static double usingXAndYTables(const double xArr[], const double yArr[],
-                                   int len, double x);
-
-    static double usingXAndYTables(double x);
-
-    static double usingXArrAndYStride(const double xArr[], const int xArrLen,
-                                      double yStride, double x);
+template<typename ExtractKey>
+struct compare_by_key {
+  template<typename Entry1, typename Entry2>
+  bool operator()(Entry1&& a, Entry2&& b) const {
+    return ExtractKey()(std::forward<Entry1>(a)) < ExtractKey()(std::forward<Entry2>(b));
+  }
 };
 
-}
+// less than
+
+template<typename Key, typename Entry, typename ExtractKey>
+class key_less_than {
+public:
+  explicit key_less_than(const Key& key): key(key) {}
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) < this->key;
+  }
+private:
+  Key key;
+};
 
-#include "CubicInterpolation-internal.hpp"
+} /* namespace datasketches */
 
-#endif /* _CUBICINTERPOLATION_HPP_ */
\ No newline at end of file
+#endif
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/theta_constants.hpp
similarity index 66%
copy from be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
copy to be/src/thirdparty/datasketches/theta_constants.hpp
index 8baecbe..d5d6fd9 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/theta_constants.hpp
@@ -17,24 +17,20 @@
  * under the License.
  */
 
-#ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
-#define _COMPOSITEINTERPOLATIONXTABLE_HPP_
+#ifndef THETA_CONSTANTS_HPP_
+#define THETA_CONSTANTS_HPP_
 
-#include <memory>
+#include <climits>
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CompositeInterpolationXTable {
-  public:
-    static int get_y_stride(int logK);
-
-    static const double* get_x_arr(int logK);
-    static int get_x_arr_length();
-};
-
+namespace theta_constants {
+  enum resize_factor { X1, X2, X4, X8 };
+  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
+  static const uint8_t MIN_LG_K = 5;
+  static const uint8_t MAX_LG_K = 26;
 }
 
-#include "CompositeInterpolationXTable-internal.hpp"
+} /* namespace datasketches */
 
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_helpers.hpp b/be/src/thirdparty/datasketches/theta_helpers.hpp
new file mode 100644
index 0000000..6852590
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_helpers.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_HELPERS_HPP_
+#define THETA_HELPERS_HPP_
+
+#include <string>
+#include <stdexcept>
+
+namespace datasketches {
+
+template<typename T>
+static void check_value(T actual, T expected, const char* description) {
+  if (actual != expected) {
+    throw std::invalid_argument(std::string(description) + " mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
+  }
+}
+
+template<bool dummy>
+class checker {
+public:
+  static void check_serial_version(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "serial version");
+  }
+  static void check_sketch_family(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "sketch family");
+  }
+  static void check_sketch_type(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "sketch type");
+  }
+  static void check_seed_hash(uint16_t actual, uint16_t expected) {
+    check_value(actual, expected, "seed hash");
+  }
+};
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection.hpp b/be/src/thirdparty/datasketches/theta_intersection.hpp
index 5945c52..98a8bf1 100644
--- a/be/src/thirdparty/datasketches/theta_intersection.hpp
+++ b/be/src/thirdparty/datasketches/theta_intersection.hpp
@@ -20,29 +20,28 @@
 #ifndef THETA_INTERSECTION_HPP_
 #define THETA_INTERSECTION_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
 #include "theta_sketch.hpp"
-#include "common_defs.hpp"
+#include "theta_intersection_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_intersection_alloc {
 public:
-  /**
-   * Creates an instance of the intersection with a given hash seed.
-   * @param seed hash seed
-   */
-  explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED);
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_alloc<Allocator>;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
+  using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+  explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Updates the intersection with a given sketch.
@@ -50,7 +49,8 @@ public:
    * can reduce the current set to leave the overlapping subset only.
    * @param sketch represents input set for the intersection
    */
-  void update(const theta_sketch_alloc<A>& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   /**
    * Produces a copy of the current state of the intersection.
@@ -59,7 +59,7 @@ public:
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return the result of the intersection
    */
-  compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
+  CompactSketch get_result(bool ordered = true) const;
 
   /**
    * Returns true if the state of the intersection is defined (not infinite "universe").
@@ -68,21 +68,14 @@ public:
   bool has_result() const;
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  bool is_valid_;
-  bool is_empty_;
-  uint64_t theta_;
-  uint8_t lg_size_;
-  vector_u64<A> keys_;
-  uint32_t num_keys_;
-  uint16_t seed_hash_;
+  State state_;
 };
 
 // alias with default allocator for convenience
-typedef theta_intersection_alloc<std::allocator<void>> theta_intersection;
+using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
 #include "theta_intersection_impl.hpp"
 
-# endif
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection_base.hpp b/be/src/thirdparty/datasketches/theta_intersection_base.hpp
new file mode 100644
index 0000000..c034590
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_intersection_base.hpp
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_INTERSECTION_BASE_HPP_
+#define THETA_INTERSECTION_BASE_HPP_
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Policy,
+  typename Sketch,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_intersection_base {
+public:
+  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename hash_table::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+  theta_intersection_base(uint64_t seed, const Policy& policy, const Allocator& allocator);
+
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
+
+  CompactSketch get_result(bool ordered = true) const;
+
+  bool has_result() const;
+
+  const Policy& get_policy() const;
+
+private:
+  Policy policy_;
+  bool is_valid_;
+  hash_table table_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_intersection_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp b/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp
new file mode 100644
index 0000000..286f0ca
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t seed, const P& policy, const A& allocator):
+policy_(policy),
+is_valid_(false),
+table_(0, 0, resize_factor::X1, theta_constants::MAX_THETA, seed, allocator, false)
+{}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+template<typename SS>
+void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
+  if (table_.is_empty_) return;
+  if (!sketch.is_empty() && sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+  table_.is_empty_ |= sketch.is_empty();
+  table_.theta_ = std::min(table_.theta_, sketch.get_theta64());
+  if (is_valid_ && table_.num_entries_ == 0) return;
+  if (sketch.get_num_retained() == 0) {
+    is_valid_ = true;
+    table_ = hash_table(0, 0, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+    return;
+  }
+  if (!is_valid_) { // first update, copy or move incoming sketch
+    is_valid_ = true;
+    const uint8_t lg_size = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
+    table_ = hash_table(lg_size, lg_size, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+    for (auto& entry: sketch) {
+      auto result = table_.find(EK()(entry));
+      if (result.second) {
+        throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
+      }
+      table_.insert(result.first, conditional_forward<SS>(entry));
+    }
+    if (table_.num_entries_ != sketch.get_num_retained()) throw std::invalid_argument("num entries mismatch, possibly corrupted input sketch");
+  } else { // intersection
+    const uint32_t max_matches = std::min(table_.num_entries_, sketch.get_num_retained());
+    std::vector<EN, A> matched_entries(table_.allocator_);
+    matched_entries.reserve(max_matches);
+    uint32_t match_count = 0;
+    uint32_t count = 0;
+    for (auto& entry: sketch) {
+      if (EK()(entry) < table_.theta_) {
+        auto result = table_.find(EK()(entry));
+        if (result.second) {
+          if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
+          policy_(*result.first, conditional_forward<SS>(entry));
+          matched_entries.push_back(std::move(*result.first));
+          ++match_count;
+        }
+      } else if (sketch.is_ordered()) {
+        break; // early stop
+      }
+      ++count;
+    }
+    if (count > sketch.get_num_retained()) {
+      throw std::invalid_argument(" more keys than expected, possibly corrupted input sketch");
+    } else if (!sketch.is_ordered() && count < sketch.get_num_retained()) {
+      throw std::invalid_argument(" fewer keys than expected, possibly corrupted input sketch");
+    }
+    if (match_count == 0) {
+      table_ = hash_table(0, 0, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+      if (table_.theta_ == theta_constants::MAX_THETA) table_.is_empty_ = true;
+    } else {
+      const uint8_t lg_size = lg_size_from_count(match_count, theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
+      table_ = hash_table(lg_size, lg_size, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+      for (uint32_t i = 0; i < match_count; i++) {
+        auto result = table_.find(EK()(matched_entries[i]));
+        table_.insert(result.first, std::move(matched_entries[i]));
+      }
+    }
+  }
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+  if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
+  std::vector<EN, A> entries(table_.allocator_);
+  if (table_.num_entries_ > 0) {
+    entries.reserve(table_.num_entries_);
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
+    if (ordered) std::sort(entries.begin(), entries.end(), comparator());
+  }
+  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), table_.theta_, std::move(entries));
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+bool theta_intersection_base<EN, EK, P, S, CS, A>::has_result() const {
+  return is_valid_;
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+const P& theta_intersection_base<EN, EK, P, S, CS, A>::get_policy() const {
+  return policy_;
+}
+
+} /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/theta_intersection_impl.hpp b/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
index d090b3a..a0c4291 100644
--- a/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
@@ -20,109 +20,27 @@
 #ifndef THETA_INTERSECTION_IMPL_HPP_
 #define THETA_INTERSECTION_IMPL_HPP_
 
-#include <algorithm>
-
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_intersection_alloc<A>::theta_intersection_alloc(uint64_t seed):
-is_valid_(false),
-is_empty_(false),
-theta_(theta_sketch_alloc<A>::MAX_THETA),
-lg_size_(0),
-keys_(),
-num_keys_(0),
-seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
+theta_intersection_alloc<A>::theta_intersection_alloc(uint64_t seed, const A& allocator):
+state_(seed, pass_through_policy(), allocator)
 {}
 
 template<typename A>
-void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
-  if (is_empty_) return;
-  if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
-  is_empty_ |= sketch.is_empty();
-  theta_ = std::min(theta_, sketch.get_theta64());
-  if (is_valid_ && num_keys_ == 0) return;
-  if (sketch.get_num_retained() == 0) {
-    is_valid_ = true;
-    if (keys_.size() > 0) {
-      keys_.resize(0);
-      lg_size_ = 0;
-      num_keys_ = 0;
-    }
-    return;
-  }
-  if (!is_valid_) { // first update, clone incoming sketch
-    is_valid_ = true;
-    lg_size_ = lg_size_from_count(sketch.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-    keys_.resize(1 << lg_size_, 0);
-    for (auto key: sketch) {
-      if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, keys_.data(), lg_size_)) {
-        throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
-      }
-      ++num_keys_;
-    }
-    if (num_keys_ != sketch.get_num_retained()) throw std::invalid_argument("num keys mismatch, possibly corrupted input sketch");
-  } else { // intersection
-    const uint32_t max_matches = std::min(num_keys_, sketch.get_num_retained());
-    vector_u64<A> matched_keys(max_matches);
-    uint32_t match_count = 0;
-    uint32_t count = 0;
-    for (auto key: sketch) {
-      if (key < theta_) {
-        if (update_theta_sketch_alloc<A>::hash_search(key, keys_.data(), lg_size_)) {
-          if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
-          matched_keys[match_count++] = key;
-        }
-      } else if (sketch.is_ordered()) {
-        break; // early stop
-      }
-      ++count;
-    }
-    if (count > sketch.get_num_retained()) {
-      throw std::invalid_argument(" more keys then expected, possibly corrupted input sketch");
-    } else if (!sketch.is_ordered() && count < sketch.get_num_retained()) {
-      throw std::invalid_argument(" fewer keys then expected, possibly corrupted input sketch");
-    }
-    if (match_count == 0) {
-      keys_.resize(0);
-      lg_size_ = 0;
-      num_keys_ = 0;
-      if (theta_ == theta_sketch_alloc<A>::MAX_THETA) is_empty_ = true;
-    } else {
-      const uint8_t lg_size = lg_size_from_count(match_count, update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-      if (lg_size != lg_size_) {
-        lg_size_ = lg_size;
-        keys_.resize(1 << lg_size_);
-      }
-      std::fill(keys_.begin(), keys_.end(), 0);
-      for (uint32_t i = 0; i < match_count; i++) {
-        update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], keys_.data(), lg_size_);
-      }
-      num_keys_ = match_count;
-    }
-  }
+template<typename SS>
+void theta_intersection_alloc<A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> theta_intersection_alloc<A>::get_result(bool ordered) const {
-  if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
-  vector_u64<A> keys(num_keys_);
-  if (num_keys_ > 0) {
-    std::copy_if(keys_.begin(), keys_.end(), keys.begin(), [](uint64_t key) { return key != 0; });
-    if (ordered) std::sort(keys.begin(), keys.end());
-  }
-  return compact_theta_sketch_alloc<A>(is_empty_, theta_, std::move(keys), seed_hash_, ordered);
+auto theta_intersection_alloc<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
 }
 
 template<typename A>
 bool theta_intersection_alloc<A>::has_result() const {
-  return is_valid_;
+  return state_.has_result();
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
similarity index 59%
copy from be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
copy to be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
index 8baecbe..417ed54 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
@@ -17,24 +17,21 @@
  * under the License.
  */
 
-#ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
-#define _COMPOSITEINTERPOLATIONXTABLE_HPP_
+#ifndef THETA_JACCARD_SIMILARITY_HPP_
+#define THETA_JACCARD_SIMILARITY_HPP_
 
-#include <memory>
+#include "theta_jaccard_similarity_base.hpp"
+#include "theta_union.hpp"
+#include "theta_intersection.hpp"
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CompositeInterpolationXTable {
-  public:
-    static int get_y_stride(int logK);
+template<typename Allocator = std::allocator<uint64_t>>
+using theta_jaccard_similarity_alloc = jaccard_similarity_base<theta_union_alloc<Allocator>, theta_intersection_alloc<Allocator>, trivial_extract_key>;
 
-    static const double* get_x_arr(int logK);
-    static int get_x_arr_length();
-};
+// alias with default allocator for convenience
+using theta_jaccard_similarity = theta_jaccard_similarity_alloc<std::allocator<uint64_t>>;
 
-}
+} /* namespace datasketches */
 
-#include "CompositeInterpolationXTable-internal.hpp"
-
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+# endif
diff --git a/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp b/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp
new file mode 100644
index 0000000..cb18601
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_JACCARD_SIMILARITY_BASE_HPP_
+#define THETA_JACCARD_SIMILARITY_BASE_HPP_
+
+#include <memory>
+#include <array>
+
+#include "theta_constants.hpp"
+#include "bounds_on_ratios_in_theta_sketched_sets.hpp"
+#include "ceiling_power_of_2.hpp"
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+template<typename Union, typename Intersection, typename ExtractKey>
+class jaccard_similarity_base {
+public:
+
+  /**
+   * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+   * <i>J(A,B) = (A ^ B)/(A U B)</i> is used to measure how similar the two sketches are to each
+   * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+   * disjoint. A Jaccard of .95 means the overlap between the two
+   * sets is 95% of the union of the two sets.
+   *
+   * <p>Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+   * are 2^25 or 2^26, this method may produce unpredictable results.
+   *
+   * @param sketch_a given sketch A
+   * @param sketch_b given sketch B
+   * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+   * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+   */
+  template<typename SketchA, typename SketchB>
+  static std::array<double, 3> jaccard(const SketchA& sketch_a, const SketchB& sketch_b) {
+    if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return {1, 1, 1};
+    if (sketch_a.is_empty() && sketch_b.is_empty()) return {1, 1, 1};
+    if (sketch_a.is_empty() || sketch_b.is_empty()) return {0, 0, 0};
+
+    auto union_ab = compute_union(sketch_a, sketch_b);
+    if (identical_sets(sketch_a, sketch_b, union_ab)) return {1, 1, 1};
+
+    // intersection
+    Intersection i;
+    i.update(sketch_a);
+    i.update(sketch_b);
+    i.update(union_ab); // ensures that intersection is a subset of the union
+    auto inter_abu = i.get_result(false);
+
+    return {
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::lower_bound_for_b_over_a(union_ab, inter_abu),
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::estimate_of_b_over_a(union_ab, inter_abu),
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::upper_bound_for_b_over_a(union_ab, inter_abu)
+    };
+  }
+
+  /**
+   * Returns true if the two given sketches are equivalent.
+   * @param sketch_a the given sketch A
+   * @param sketch_b the given sketch B
+   * @return true if the two given sketches are exactly equal
+   */
+  template<typename SketchA, typename SketchB>
+  static bool exactly_equal(const SketchA& sketch_a, const SketchB& sketch_b) {
+    if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return true;
+    if (sketch_a.is_empty() && sketch_b.is_empty()) return true;
+    if (sketch_a.is_empty() || sketch_b.is_empty()) return false;
+
+    auto union_ab = compute_union(sketch_a, sketch_b);
+    if (identical_sets(sketch_a, sketch_b, union_ab)) return true;
+    return false;
+  }
+
+  /**
+   * Tests similarity of an actual Sketch against an expected Sketch.
+   * Computes the lower bound of the Jaccard index <i>J<sub>LB</sub></i> of the actual and
+   * expected sketches.
+   * if <i>J<sub>LB</sub> &ge; threshold</i>, then the sketches are considered to be
+   * similar with a confidence of 97.7%.
+   *
+   * @param actual the sketch to be tested
+   * @param expected the reference sketch that is considered to be correct
+   * @param threshold a real value between zero and one
+   * @return true if the similarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence
+   */
+  template<typename SketchA, typename SketchB>
+  static bool similarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
+    auto jc = jaccard(actual, expected);
+    return jc[0] >= threshold;
+  }
+
+  /**
+   * Tests dissimilarity of an actual Sketch against an expected Sketch.
+   * Computes the upper bound of the Jaccard index <i>J<sub>UB</sub></i> of the actual and
+   * expected sketches.
+   * if <i>J<sub>UB</sub> &le; threshold</i>, then the sketches are considered to be
+   * dissimilar with a confidence of 97.7%.
+   *
+   * @param actual the sketch to be tested
+   * @param expected the reference sketch that is considered to be correct
+   * @param threshold a real value between zero and one
+   * @return true if the dissimilarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence
+   */
+  template<typename SketchA, typename SketchB>
+  static bool dissimilarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
+    auto jc = jaccard(actual, expected);
+    return jc[2] <= threshold;
+  }
+
+private:
+
+  template<typename SketchA, typename SketchB>
+  static typename Union::CompactSketch compute_union(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const unsigned count_a = sketch_a.get_num_retained();
+    const unsigned count_b = sketch_b.get_num_retained();
+    const unsigned lg_k = std::min(std::max(log2(ceiling_power_of_2(count_a + count_b)), theta_constants::MIN_LG_K), theta_constants::MAX_LG_K);
+    auto u = typename Union::builder().set_lg_k(lg_k).build();
+    u.update(sketch_a);
+    u.update(sketch_b);
+    return u.get_result(false);
+  }
+
+  template<typename SketchA, typename SketchB, typename UnionAB>
+  static bool identical_sets(const SketchA& sketch_a, const SketchB& sketch_b, const UnionAB& union_ab) {
+    if (union_ab.get_num_retained() == sketch_a.get_num_retained() &&
+        union_ab.get_num_retained() == sketch_b.get_num_retained() &&
+        union_ab.get_theta64() == sketch_a.get_theta64() &&
+        union_ab.get_theta64() == sketch_b.get_theta64()) return true;
+    return false;
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/theta_set_difference_base.hpp b/be/src/thirdparty/datasketches/theta_set_difference_base.hpp
new file mode 100644
index 0000000..5cc601f
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_set_difference_base.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_SET_DIFFERENCE_BASE_HPP_
+#define THETA_SET_DIFFERENCE_BASE_HPP_
+
+#include "theta_comparators.hpp"
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_set_difference_base {
+public:
+  using comparator = compare_by_key<ExtractKey>;
+  using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
+  using hash_table = theta_update_sketch_base<uint64_t, trivial_extract_key, AllocU64>;
+
+  theta_set_difference_base(uint64_t seed, const Allocator& allocator = Allocator());
+
+  template<typename FwdSketch, typename Sketch>
+  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered) const;
+
+private:
+  Allocator allocator_;
+  uint16_t seed_hash_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_set_difference_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp b/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp
new file mode 100644
index 0000000..4ab98a8
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_A_SET_DIFFERENCE_BASE_IMPL_HPP_
+#define THETA_A_SET_DIFFERENCE_BASE_IMPL_HPP_
+
+#include <algorithm>
+
+#include "conditional_back_inserter.hpp"
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename CS, typename A>
+theta_set_difference_base<EN, EK, CS, A>::theta_set_difference_base(uint64_t seed, const A& allocator):
+allocator_(allocator),
+seed_hash_(compute_seed_hash(seed))
+{}
+
+template<typename EN, typename EK, typename CS, typename A>
+template<typename FwdSketch, typename Sketch>
+CS theta_set_difference_base<EN, EK, CS, A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const {
+  if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return CS(a, ordered);
+  if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
+  if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");
+
+  const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
+  std::vector<EN, A> entries(allocator_);
+  bool is_empty = a.is_empty();
+
+  if (b.get_num_retained() == 0) {
+    std::copy_if(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), std::back_inserter(entries),
+        key_less_than<uint64_t, EN, EK>(theta));
+  } else {
+    if (a.is_ordered() && b.is_ordered()) { // sort-based
+      std::set_difference(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), b.begin(), b.end(),
+          conditional_back_inserter(entries, key_less_than<uint64_t, EN, EK>(theta)), comparator());
+    } else { // hash-based
+      const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), hash_table::REBUILD_THRESHOLD);
+      hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 0, 0, allocator_); // theta and seed are not used here
+      for (const auto& entry: b) {
+        const uint64_t hash = EK()(entry);
+        if (hash < theta) {
+          table.insert(table.find(hash).first, hash);
+        } else if (b.is_ordered()) {
+          break; // early stop
+        }
+      }
+
+      // scan A lookup B
+      for (auto& entry: a) {
+        const uint64_t hash = EK()(entry);
+        if (hash < theta) {
+          auto result = table.find(hash);
+          if (!result.second) entries.push_back(conditional_forward<FwdSketch>(entry));
+        } else if (a.is_ordered()) {
+          break; // early stop
+        }
+      }
+    }
+  }
+  if (entries.empty() && theta == theta_constants::MAX_THETA) is_empty = true;
+  if (ordered && !a.is_ordered()) std::sort(entries.begin(), entries.end(), comparator());
+  return CS(is_empty, a.is_ordered() || ordered, seed_hash_, theta, std::move(entries));
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_sketch.hpp b/be/src/thirdparty/datasketches/theta_sketch.hpp
index b809f71..2e24168 100644
--- a/be/src/thirdparty/datasketches/theta_sketch.hpp
+++ b/be/src/thirdparty/datasketches/theta_sketch.hpp
@@ -20,45 +20,29 @@
 #ifndef THETA_SKETCH_HPP_
 #define THETA_SKETCH_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-#include <vector>
-
-#include "common_defs.hpp"
+#include "theta_update_sketch_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-// forward-declarations
-template<typename A> class theta_sketch_alloc;
-template<typename A> class update_theta_sketch_alloc;
-template<typename A> class compact_theta_sketch_alloc;
-template<typename A> class theta_union_alloc;
-template<typename A> class theta_intersection_alloc;
-template<typename A> class theta_a_not_b_alloc;
-
-// for serialization as raw bytes
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_sketch_alloc {
 public:
-  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
-  static const uint8_t SERIAL_VERSION = 3;
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using iterator = theta_iterator<Entry, ExtractKey>;
+  using const_iterator = theta_const_iterator<Entry, ExtractKey>;
 
   virtual ~theta_sketch_alloc() = default;
 
   /**
+   * @return allocator
+   */
+  virtual Allocator get_allocator() const = 0;
+
+  /**
    * @return true if this sketch represents an empty set (not the same as no retained entries!)
    */
-  bool is_empty() const;
+  virtual bool is_empty() const = 0;
 
   /**
    * @return estimate of the distinct count of the input stream
@@ -96,13 +80,16 @@ public:
   /**
    * @return theta as a positive integer between 0 and LLONG_MAX
    */
-  uint64_t get_theta64() const;
+  virtual uint64_t get_theta64() const = 0;
 
   /**
    * @return the number of retained entries in the sketch
    */
   virtual uint32_t get_num_retained() const = 0;
 
+  /**
+   * @return hash of the seed that was used to hash the input
+   */
   virtual uint16_t get_seed_hash() const = 0;
 
   /**
@@ -111,109 +98,82 @@ public:
   virtual bool is_ordered() const = 0;
 
   /**
-   * Writes a human-readable summary of this sketch to a given stream
+   * Provides a human-readable summary of this sketch as a string
    * @param print_items if true include the list of items retained by the sketch
+   * @return sketch summary as a string
    */
-  virtual string<A> to_string(bool print_items = false) const = 0;
-
-  /**
-   * This method serializes the sketch into a given stream in a binary form
-   * @param os output stream
-   */
-  virtual void serialize(std::ostream& os) const = 0;
-
-  // This is a convenience alias for users
-  // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  virtual string<Allocator> to_string(bool print_items = false) const;
 
   /**
-   * This method serializes the sketch as a vector of bytes.
-   * An optional header can be reserved in front of the sketch.
-   * It is an uninitialized space of a given size.
-   * This header is used in Datasketches PostgreSQL extension.
-   * @param header_size_bytes space to reserve in front of the sketch
-   */
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const = 0;
-
-  // This is a convenience alias for users
-  // The type returned by the following deserialize methods
-  // It is not possible to return instances of an abstract type, so this has to be a pointer
-  typedef std::unique_ptr<theta_sketch_alloc<A>, std::function<void(theta_sketch_alloc<A>*)>> unique_ptr;
-
-  /**
-   * This method deserializes a sketch from a given stream.
-   * @param is input stream
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch as a unique_ptr
+   * Iterator over hash values in this sketch.
+   * @return begin iterator
    */
-  static unique_ptr deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  virtual iterator begin() = 0;
 
   /**
-   * This method deserializes a sketch from a given array of bytes.
-   * @param bytes pointer to the array of bytes
-   * @param size the size of the array
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of the sketch
+   * Iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
    */
-  static unique_ptr deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
-
-  class const_iterator;
+  virtual iterator end() = 0;
 
   /**
-   * Iterator over hash values in this sketch.
+   * Const iterator over hash values in this sketch.
    * @return begin iterator
    */
   virtual const_iterator begin() const = 0;
 
   /**
-   * Iterator pointing past the valid range.
+   * Const iterator pointing past the valid range.
    * Not to be incremented or dereferenced.
    * @return end iterator
    */
   virtual const_iterator end() const = 0;
 
 protected:
-  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
-
-  bool is_empty_;
-  uint64_t theta_;
-
-  theta_sketch_alloc(bool is_empty, uint64_t theta);
-
-  static uint16_t get_seed_hash(uint64_t seed);
-
-  static void check_sketch_type(uint8_t actual, uint8_t expected);
-  static void check_serial_version(uint8_t actual, uint8_t expected);
-  static void check_seed_hash(uint16_t actual, uint16_t expected);
-
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
+  using ostrstream = std::basic_ostringstream<char, std::char_traits<char>, AllocChar<Allocator>>;
+  virtual void print_specifics(ostrstream& os) const = 0;
 };
 
-// update sketch
-
-template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
-template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
+// forward declaration
+template<typename A> class compact_theta_sketch_alloc;
 
-template<typename A>
-class update_theta_sketch_alloc: public theta_sketch_alloc<A> {
+template<typename Allocator = std::allocator<uint64_t>>
+class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
 public:
-  class builder;
-  enum resize_factor { X1, X2, X4, X8 };
-  static const uint8_t SKETCH_TYPE = 2;
+  using Base = theta_sketch_alloc<Allocator>;
+  using Entry = typename Base::Entry;
+  using ExtractKey = typename Base::ExtractKey;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename theta_table::resize_factor;
 
   // No constructor here. Use builder instead.
+  class builder;
 
+  update_theta_sketch_alloc(const update_theta_sketch_alloc&) = default;
+  update_theta_sketch_alloc(update_theta_sketch_alloc&&) noexcept = default;
   virtual ~update_theta_sketch_alloc() = default;
+  update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc&) = default;
+  update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&&) = default;
 
-  virtual uint32_t get_num_retained() const;
-  virtual uint16_t get_seed_hash() const;
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
   virtual bool is_ordered() const;
-  virtual string<A> to_string(bool print_items = false) const;
-  virtual void serialize(std::ostream& os) const;
-  typedef vector_u8<A> vector_bytes; // alias for users
-  // header space is reserved, but not initialized
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const;
+  virtual uint16_t get_seed_hash() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+
+  /**
+   * @return configured nominal number of entries in the sketch
+   */
+  uint8_t get_lg_k() const;
+
+  /**
+   * @return configured resize factor of the sketch
+   */
+  resize_factor get_rf() const;
 
   /**
    * Update this sketch with a given string.
@@ -302,7 +262,7 @@ public:
    * @param data pointer to the data
    * @param length of the data in bytes
    */
-  void update(const void* data, unsigned length);
+  void update(const void* data, size_t length);
 
   /**
    * Remove retained entries in excess of the nominal size k (if any)
@@ -314,105 +274,85 @@ public:
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return compact sketch
    */
-  compact_theta_sketch_alloc<A> compact(bool ordered = true) const;
-
-  virtual typename theta_sketch_alloc<A>::const_iterator begin() const;
-  virtual typename theta_sketch_alloc<A>::const_iterator end() const;
-
-  /**
-   * This method deserializes a sketch from a given stream.
-   * @param is input stream
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch
-   */
-  static update_theta_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  compact_theta_sketch_alloc<Allocator> compact(bool ordered = true) const;
 
-  /**
-   * This method deserializes a sketch from a given array of bytes.
-   * @param bytes pointer to the array of bytes
-   * @param size the size of the array
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of the sketch
-   */
-  static update_theta_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
 private:
-  // resize threshold = 0.5 tuned for speed
-  static constexpr double RESIZE_THRESHOLD = 0.5;
-  // hash table rebuild threshold = 15/16
-  static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
-
-  static constexpr uint8_t STRIDE_HASH_BITS = 7;
-  static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
-
-  uint8_t lg_cur_size_;
-  uint8_t lg_nom_size_;
-  vector_u64<A> keys_;
-  uint32_t num_keys_;
-  resize_factor rf_;
-  float p_;
-  uint64_t seed_;
-  uint32_t capacity_;
+  theta_table table_;
 
   // for builder
-  update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
-
-  // for deserialize
-  update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed);
+  update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator);
 
-  void resize();
-  void rebuild();
-
-  friend theta_union_alloc<A>;
-  void internal_update(uint64_t hash);
-
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
-  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
-  static inline uint32_t get_stride(uint64_t hash, uint8_t lg_size);
-  static bool hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size);
-  static bool hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size);
-
-  friend theta_sketch_alloc<A>;
-  static update_theta_sketch_alloc<A> internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed);
-  static update_theta_sketch_alloc<A> internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed);
+  using ostrstream = typename Base::ostrstream;
+  virtual void print_specifics(ostrstream& os) const;
 };
 
 // compact sketch
 
-template<typename A>
-class compact_theta_sketch_alloc: public theta_sketch_alloc<A> {
+template<typename Allocator = std::allocator<uint64_t>>
+class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
 public:
+  using Base = theta_sketch_alloc<Allocator>;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using AllocBytes = typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  static const uint8_t SERIAL_VERSION = 3;
   static const uint8_t SKETCH_TYPE = 3;
 
-  // No constructor here.
   // Instances of this type can be obtained:
-  // - by compacting an update_theta_sketch
+  // - by compacting an update_theta_sketch_alloc
   // - as a result of a set operation
   // - by deserializing a previously serialized compact sketch
 
-  compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered);
+  compact_theta_sketch_alloc(const Base& other, bool ordered);
+  compact_theta_sketch_alloc(const compact_theta_sketch_alloc&) = default;
+  compact_theta_sketch_alloc(compact_theta_sketch_alloc&&) noexcept = default;
   virtual ~compact_theta_sketch_alloc() = default;
+  compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc&) = default;
+  compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&&) = default;
 
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
   virtual uint32_t get_num_retained() const;
   virtual uint16_t get_seed_hash() const;
-  virtual bool is_ordered() const;
-  virtual string<A> to_string(bool print_items = false) const;
-  virtual void serialize(std::ostream& os) const;
-  typedef vector_u8<A> vector_bytes; // alias for users
-  // header space is reserved, but not initialized
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const;
 
-  virtual typename theta_sketch_alloc<A>::const_iterator begin() const;
-  virtual typename theta_sketch_alloc<A>::const_iterator end() const;
+  /**
+   * This method serializes the sketch into a given stream in a binary form
+   * @param os output stream
+   */
+  void serialize(std::ostream& os) const;
+
+  /**
+   * This method serializes the sketch as a vector of bytes.
+   * An optional header can be reserved in front of the sketch.
+   * It is an uninitialized space of a given size.
+   * This header is used in Datasketches PostgreSQL extension.
+   * @param header_size_bytes space to reserve in front of the sketch
+   */
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
+
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
   /**
    * This method deserializes a sketch from a given stream.
    * @param is input stream
    * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch
+   * @return an instance of the sketch
    */
-  static compact_theta_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  static compact_theta_sketch_alloc deserialize(std::istream& is,
+      uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -421,110 +361,36 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of the sketch
    */
-  static compact_theta_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  static compact_theta_sketch_alloc deserialize(const void* bytes, size_t size,
+      uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+  // for internal use
+  compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
+  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
 
-  vector_u64<A> keys_;
-  uint16_t seed_hash_;
+  bool is_empty_;
   bool is_ordered_;
+  uint16_t seed_hash_;
+  uint64_t theta_;
+  std::vector<uint64_t, Allocator> entries_;
 
-  friend theta_sketch_alloc<A>;
-  friend update_theta_sketch_alloc<A>;
-  friend theta_union_alloc<A>;
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
-  compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered);
-  static compact_theta_sketch_alloc<A> internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
-  static compact_theta_sketch_alloc<A> internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
-};
-
-// builder
-
-template<typename A>
-class update_theta_sketch_alloc<A>::builder {
-public:
-  static const uint8_t MIN_LG_K = 5;
-  static const uint8_t DEFAULT_LG_K = 12;
-  static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
-
-  /**
-   * Creates and instance of the builder with default parameters.
-   */
-  builder();
-
-  /**
-   * Set log2(k), where k is a nominal number of entries in the sketch
-   * @param lg_k base 2 logarithm of nominal number of entries
-   * @return this builder
-   */
-  builder& set_lg_k(uint8_t lg_k);
-
-  /**
-   * Set resize factor for the internal hash table (defaults to 8)
-   * @param rf resize factor
-   * @return this builder
-   */
-  builder& set_resize_factor(resize_factor rf);
-
-  /**
-   * Set sampling probability (initial theta). The default is 1, so the sketch retains
-   * all entries until it reaches the limit, at which point it goes into the estimation mode
-   * and reduces the effective sampling probability (theta) as necessary.
-   * @param p sampling probability
-   * @return this builder
-   */
-  builder& set_p(float p);
-
-  /**
-   * Set the seed for the hash function. Should be used carefully if needed.
-   * Sketches produced with different seed are not compatible
-   * and cannot be mixed in set operations.
-   * @param seed hash seed
-   * @return this builder
-   */
-  builder& set_seed(uint64_t seed);
-
-  /**
-   * This is to create an instance of the sketch with predefined parameters.
-   * @return and instance of the sketch
-   */
-  update_theta_sketch_alloc<A> build() const;
-
-private:
-  uint8_t lg_k_;
-  resize_factor rf_;
-  float p_;
-  uint64_t seed_;
-
-  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
+  using ostrstream = typename Base::ostrstream;
+  virtual void print_specifics(ostrstream& os) const;
 };
 
-// iterator
-template<typename A>
-class theta_sketch_alloc<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint64_t> {
+template<typename Allocator>
+class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
 public:
-  const_iterator& operator++();
-  const_iterator operator++(int);
-  bool operator==(const const_iterator& other) const;
-  bool operator!=(const const_iterator& other) const;
-  uint64_t operator*() const;
-
-private:
-  const uint64_t* keys_;
-  uint32_t size_;
-  uint32_t index_;
-  const_iterator(const uint64_t* keys, uint32_t size, uint32_t index);
-  friend class update_theta_sketch_alloc<A>;
-  friend class compact_theta_sketch_alloc<A>;
+    builder(const Allocator& allocator = Allocator());
+    update_theta_sketch_alloc build() const;
 };
 
-
 // aliases with default allocator for convenience
-typedef theta_sketch_alloc<std::allocator<void>> theta_sketch;
-typedef update_theta_sketch_alloc<std::allocator<void>> update_theta_sketch;
-typedef compact_theta_sketch_alloc<std::allocator<void>> compact_theta_sketch;
+using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
+using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
+using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
diff --git a/be/src/thirdparty/datasketches/theta_sketch_impl.hpp b/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
index 579a675..1335e59 100644
--- a/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
@@ -20,35 +20,23 @@
 #ifndef THETA_SKETCH_IMPL_HPP_
 #define THETA_SKETCH_IMPL_HPP_
 
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <functional>
-#include <istream>
-#include <ostream>
 #include <sstream>
+#include <vector>
 
-#include "MurmurHash3.h"
 #include "serde.hpp"
 #include "binomial_bounds.hpp"
-#include "memory_operations.hpp"
+#include "theta_helpers.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_sketch_alloc<A>::theta_sketch_alloc(bool is_empty, uint64_t theta):
-is_empty_(is_empty), theta_(theta)
-{}
+bool theta_sketch_alloc<A>::is_estimation_mode() const {
+  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
+}
 
 template<typename A>
-bool theta_sketch_alloc<A>::is_empty() const {
-  return is_empty_;
+double theta_sketch_alloc<A>::get_theta() const {
+  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
 }
 
 template<typename A>
@@ -69,182 +57,47 @@ double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
 }
 
 template<typename A>
-bool theta_sketch_alloc<A>::is_estimation_mode() const {
-  return theta_ < MAX_THETA && !is_empty_;
-}
-
-template<typename A>
-double theta_sketch_alloc<A>::get_theta() const {
-  return (double) theta_ / MAX_THETA;
-}
-
-template<typename A>
-uint64_t theta_sketch_alloc<A>::get_theta64() const {
-  return theta_;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
-  uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t type;
-  is.read((char*)&type, sizeof(type));
-  uint8_t lg_nom_size;
-  is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-
-  check_serial_version(serial_version, SERIAL_VERSION);
-
-  if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    check_seed_hash(seed_hash, get_seed_hash(seed));
-    typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
-    typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(update_theta_sketch_alloc<A>::internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed))),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-    if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
-    typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  }
-  throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
-  ensure_minimum_memory(size, static_cast<size_t>(8));
-  const char* ptr = static_cast<const char*>(bytes);
-  uint8_t preamble_longs;
-  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
-  uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
-  uint8_t type;
-  ptr += copy_from_mem(ptr, &type, sizeof(type));
-  uint8_t lg_nom_size;
-  ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-
-  check_serial_version(serial_version, SERIAL_VERSION);
-
-  if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    check_seed_hash(seed_hash, get_seed_hash(seed));
-    typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
-    typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(
-        update_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed))
-      ),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-    if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
-    typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
-        compact_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash))
-      ),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  }
-  throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
-}
-
-template<typename A>
-uint16_t theta_sketch_alloc<A>::get_seed_hash(uint64_t seed) {
-  HashState hashes;
-  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
-  return hashes.h1;
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_sketch_type(uint8_t actual, uint8_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch type mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
-  }
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_serial_version(uint8_t actual, uint8_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch serial version mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
-  }
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_seed_hash(uint16_t actual, uint16_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch seed hash mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
+string<A> theta_sketch_alloc<A>::to_string(bool detail) const {
+  ostrstream os;
+  os << "### Theta sketch summary:" << std::endl;
+  os << "   num retained entries : " << get_num_retained() << std::endl;
+  os << "   seed hash            : " << get_seed_hash() << std::endl;
+  os << "   empty?               : " << (is_empty() ? "true" : "false") << std::endl;
+  os << "   ordered?             : " << (is_ordered() ? "true" : "false") << std::endl;
+  os << "   estimation mode?     : " << (is_estimation_mode() ? "true" : "false") << std::endl;
+  os << "   theta (fraction)     : " << get_theta() << std::endl;
+  os << "   theta (raw 64-bit)   : " << get_theta64() << std::endl;
+  os << "   estimate             : " << this->get_estimate() << std::endl;
+  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
+  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
+  print_specifics(os);
+  os << "### End sketch summary" << std::endl;
+  if (detail) {
+    os << "### Retained entries" << std::endl;
+    for (const auto& hash: *this) {
+      os << hash << std::endl;
+    }
+    os << "### End retained entries" << std::endl;
   }
+  return os.str();
 }
 
 // update sketch
 
 template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
-lg_cur_size_(lg_cur_size),
-lg_nom_size_(lg_nom_size),
-keys_(1 << lg_cur_size_, 0),
-num_keys_(0),
-rf_(rf),
-p_(p),
-seed_(seed),
-capacity_(get_capacity(lg_cur_size, lg_nom_size))
-{
-  if (p < 1) this->theta_ *= p;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
-theta_sketch_alloc<A>(is_empty, theta),
-lg_cur_size_(lg_cur_size),
-lg_nom_size_(lg_nom_size),
-keys_(std::move(keys)),
-num_keys_(num_keys),
-rf_(rf),
-p_(p),
-seed_(seed),
-capacity_(get_capacity(lg_cur_size, lg_nom_size))
+update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    uint64_t theta, uint64_t seed, const A& allocator):
+table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
 {}
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
-  return num_keys_;
+A update_theta_sketch_alloc<A>::get_allocator() const {
+  return table_.allocator_;
 }
 
 template<typename A>
-uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
-  return theta_sketch_alloc<A>::get_seed_hash(seed_);
+bool update_theta_sketch_alloc<A>::is_empty() const {
+  return table_.is_empty_;
 }
 
 template<typename A>
@@ -253,169 +106,28 @@ bool update_theta_sketch_alloc<A>::is_ordered() const {
 }
 
 template<typename A>
-string<A> update_theta_sketch_alloc<A>::to_string(bool print_items) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  os << "### Update Theta sketch summary:" << std::endl;
-  os << "   lg nominal size      : " << (int) lg_nom_size_ << std::endl;
-  os << "   lg current size      : " << (int) lg_cur_size_ << std::endl;
-  os << "   num retained keys    : " << num_keys_ << std::endl;
-  os << "   resize factor        : " << (1 << rf_) << std::endl;
-  os << "   sampling probability : " << p_ << std::endl;
-  os << "   seed hash            : " << this->get_seed_hash() << std::endl;
-  os << "   empty?               : " << (this->is_empty() ? "true" : "false") << std::endl;
-  os << "   ordered?             : " << (this->is_ordered() ? "true" : "false") << std::endl;
-  os << "   estimation mode?     : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
-  os << "   theta (fraction)     : " << this->get_theta() << std::endl;
-  os << "   theta (raw 64-bit)   : " << this->theta_ << std::endl;
-  os << "   estimate             : " << this->get_estimate() << std::endl;
-  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
-  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
-  os << "### End sketch summary" << std::endl;
-  if (print_items) {
-    os << "### Retained keys" << std::endl;
-    for (auto key: *this) os << "   " << key << std::endl;
-    os << "### End retained keys" << std::endl;
-  }
-  return os.str();
-}
-
-template<typename A>
-void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
-  const uint8_t preamble_longs_and_rf = 3 | (rf_ << 6);
-  os.write((char*)&preamble_longs_and_rf, sizeof(preamble_longs_and_rf));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
-  os.write((char*)&serial_version, sizeof(serial_version));
-  const uint8_t type = SKETCH_TYPE;
-  os.write((char*)&type, sizeof(type));
-  os.write((char*)&lg_nom_size_, sizeof(lg_nom_size_));
-  os.write((char*)&lg_cur_size_, sizeof(lg_cur_size_));
-  const uint8_t flags_byte(
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
-  );
-  os.write((char*)&flags_byte, sizeof(flags_byte));
-  const uint16_t seed_hash = get_seed_hash();
-  os.write((char*)&seed_hash, sizeof(seed_hash));
-  os.write((char*)&num_keys_, sizeof(num_keys_));
-  os.write((char*)&p_, sizeof(p_));
-  os.write((char*)&(this->theta_), sizeof(uint64_t));
-  os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
-}
-
-template<typename A>
-vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  const uint8_t preamble_longs = 3;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
-  vector_u8<A> bytes(size);
-  uint8_t* ptr = bytes.data() + header_size_bytes;
-
-  const uint8_t preamble_longs_and_rf = preamble_longs | (rf_ << 6);
-  ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
-  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
-  const uint8_t type = SKETCH_TYPE;
-  ptr += copy_to_mem(&type, ptr, sizeof(type));
-  ptr += copy_to_mem(&lg_nom_size_, ptr, sizeof(lg_nom_size_));
-  ptr += copy_to_mem(&lg_cur_size_, ptr, sizeof(lg_cur_size_));
-  const uint8_t flags_byte(
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
-  );
-  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
-  const uint16_t seed_hash = get_seed_hash();
-  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
-  ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
-  ptr += copy_to_mem(&p_, ptr, sizeof(p_));
-  ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
-  ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
-
-  return bytes;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
-  uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
-  resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
-  preamble_longs &= 0x3f; // remove resize factor
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t type;
-  is.read((char*)&type, sizeof(type));
-  uint8_t lg_nom_size;
-  is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed);
+uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
+  return table_.theta_;
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
-  uint32_t num_keys;
-  is.read((char*)&num_keys, sizeof(num_keys));
-  float p;
-  is.read((char*)&p, sizeof(p));
-  uint64_t theta;
-  is.read((char*)&theta, sizeof(theta));
-  vector_u64<A> keys(1 << lg_cur_size);
-  is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is.good()) throw std::runtime_error("error reading from std::istream"); 
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
+uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
+  return table_.num_entries_;
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
-  ensure_minimum_memory(size, 8);
-  const char* ptr = static_cast<const char*>(bytes);
-  uint8_t preamble_longs;
-  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
-  resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
-  preamble_longs &= 0x3f; // remove resize factor
-  uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
-  uint8_t type;
-  ptr += copy_from_mem(ptr, &type, sizeof(type));
-  uint8_t lg_nom_size;
-  ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed);
+uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
+  return compute_seed_hash(table_.seed_);
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
-  const uint32_t table_size = 1 << lg_cur_size;
-  ensure_minimum_memory(size, 16 + sizeof(uint64_t) * table_size);
-  const char* ptr = static_cast<const char*>(bytes);
-  uint32_t num_keys;
-  ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
-  float p;
-  ptr += copy_from_mem(ptr, &p, sizeof(p));
-  uint64_t theta;
-  ptr += copy_from_mem(ptr, &theta, sizeof(theta));
-  vector_u64<A> keys(table_size);
-  ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
+uint8_t update_theta_sketch_alloc<A>::get_lg_k() const {
+  return table_.lg_nom_size_;
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::update(const std::string& value) {
-  if (value.empty()) return;
-  update(value.c_str(), value.length());
+auto update_theta_sketch_alloc<A>::get_rf() const -> resize_factor {
+  return table_.rf_;
 }
 
 template<typename A>
@@ -460,19 +172,7 @@ void update_theta_sketch_alloc<A>::update(int8_t value) {
 
 template<typename A>
 void update_theta_sketch_alloc<A>::update(double value) {
-  union {
-    int64_t long_value;
-    double double_value;
-  } long_double_union;
-
-  if (value == 0.0) {
-    long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
-  } else if (std::isnan(value)) {
-    long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
-  } else {
-    long_double_union.double_value = value;
-  }
-  update(&long_double_union, sizeof(long_double_union));
+  update(canonical_double(value));
 }
 
 template<typename A>
@@ -481,157 +181,116 @@ void update_theta_sketch_alloc<A>::update(float value) {
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::update(const void* data, unsigned length) {
-  HashState hashes;
-  MurmurHash3_x64_128(data, length, seed_, hashes);
-  const uint64_t hash = hashes.h1 >> 1; // Java implementation does logical shift >>> to make values positive
-  internal_update(hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
-  return compact_theta_sketch_alloc<A>(*this, ordered);
+void update_theta_sketch_alloc<A>::update(const std::string& value) {
+  if (value.empty()) return;
+  update(value.c_str(), value.length());
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
-  this->is_empty_ = false;
-  if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
-  if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
-    num_keys_++;
-    if (num_keys_ > capacity_) {
-      if (lg_cur_size_ <= lg_nom_size_) {
-        resize();
-      } else {
-        rebuild();
-      }
-    }
+void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
+  const uint64_t hash = table_.hash_and_screen(data, length);
+  if (hash == 0) return;
+  auto result = table_.find(hash);
+  if (!result.second) {
+    table_.insert(result.first, hash);
   }
 }
 
 template<typename A>
 void update_theta_sketch_alloc<A>::trim() {
-  if (num_keys_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
+  table_.trim();
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::resize() {
-  const uint8_t lg_tgt_size = lg_nom_size_ + 1;
-  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
-  const uint8_t lg_new_size = lg_cur_size_ + factor;
-  const uint32_t new_size = 1 << lg_new_size;
-  vector_u64<A> new_keys(new_size, 0);
-  for (uint32_t i = 0; i < keys_.size(); i++) {
-    if (keys_[i] != 0) {
-      hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert
-    }
-  }
-  keys_ = std::move(new_keys);
-  lg_cur_size_ += factor;
-  capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
-}
-
-template<typename A>
-void update_theta_sketch_alloc<A>::rebuild() {
-  const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
-  std::nth_element(keys_.begin(), keys_.begin() + pivot, keys_.end());
-  this->theta_ = keys_[pivot];
-  vector_u64<A> new_keys(keys_.size(), 0);
-  num_keys_ = 0;
-  for (uint32_t i = 0; i < keys_.size(); i++) {
-    if (keys_[i] != 0 && keys_[i] < this->theta_) {
-      hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert
-      num_keys_++;
-    }
-  }
-  keys_ = std::move(new_keys);
+auto update_theta_sketch_alloc<A>::begin() -> iterator {
+  return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
-  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
-  return std::floor(fraction * (1 << lg_cur_size));
+auto update_theta_sketch_alloc<A>::end() -> iterator {
+  return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_stride(uint64_t hash, uint8_t lg_size) {
-  // odd and independent of index assuming lg_size lowest bits of the hash were used for the index
-  return (2 * static_cast<uint32_t>((hash >> lg_size) & STRIDE_MASK)) + 1;
+auto update_theta_sketch_alloc<A>::begin() const -> const_iterator {
+  return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-bool update_theta_sketch_alloc<A>::hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size) {
-  const uint32_t mask = (1 << lg_size) - 1;
-  const uint32_t stride = get_stride(hash, lg_size);
-  uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
+auto update_theta_sketch_alloc<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
+}
 
-  // search for duplicate or zero
-  const uint32_t loop_index = cur_probe;
-  do {
-    const uint64_t value = table[cur_probe];
-    if (value == 0) {
-      table[cur_probe] = hash; // insert value
-      return true;
-    } else if (value == hash) {
-      return false; // found a duplicate
-    }
-    cur_probe = (cur_probe + stride) & mask;
-  } while (cur_probe != loop_index);
-  throw std::logic_error("key not found and no empty slots!");
-}
-
-template<typename A>
-bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size) {
-  const uint32_t mask = (1 << lg_size) - 1;
-  const uint32_t stride = update_theta_sketch_alloc<A>::get_stride(hash, lg_size);
-  uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
-  const uint32_t loop_index = cur_probe;
-  do {
-    const uint64_t value = table[cur_probe];
-    if (value == 0) {
-      return false;
-    } else if (value == hash) {
-      return true;
-    }
-    cur_probe = (cur_probe + stride) & mask;
-  } while (cur_probe != loop_index);
-  throw std::logic_error("key not found and search wrapped");
+template<typename A>
+compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
+  return compact_theta_sketch_alloc<A>(*this, ordered);
 }
 
 template<typename A>
-typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
+void update_theta_sketch_alloc<A>::print_specifics(ostrstream& os) const {
+  os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
+  os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
+  os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
 }
 
+// builder
+
 template<typename A>
-typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
+update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
+
+template<typename A>
+update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
+  return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
 }
 
 // compact sketch
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
-theta_sketch_alloc<A>(is_empty, theta),
-keys_(std::move(keys)),
+compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Base& other, bool ordered):
+is_empty_(other.is_empty()),
+is_ordered_(other.is_ordered() || ordered),
+seed_hash_(other.get_seed_hash()),
+theta_(other.get_theta64()),
+entries_(other.get_allocator())
+{
+  entries_.reserve(other.get_num_retained());
+  std::copy(other.begin(), other.end(), std::back_inserter(entries_));
+  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
+}
+
+template<typename A>
+compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
+    std::vector<uint64_t, A>&& entries):
+is_empty_(is_empty),
+is_ordered_(is_ordered),
 seed_hash_(seed_hash),
-is_ordered_(is_ordered)
+theta_(theta),
+entries_(std::move(entries))
 {}
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered):
-theta_sketch_alloc<A>(other),
-keys_(other.get_num_retained()),
-seed_hash_(other.get_seed_hash()),
-is_ordered_(other.is_ordered() || ordered)
-{
-  std::copy(other.begin(), other.end(), keys_.begin());
-  if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
+A compact_theta_sketch_alloc<A>::get_allocator() const {
+  return entries_.get_allocator();
+}
+
+template<typename A>
+bool compact_theta_sketch_alloc<A>::is_empty() const {
+  return is_empty_;
+}
+
+template<typename A>
+bool compact_theta_sketch_alloc<A>::is_ordered() const {
+  return is_ordered_;
+}
+
+template<typename A>
+uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
+  return theta_;
 }
 
 template<typename A>
 uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
-  return keys_.size();
+  return entries_.size();
 }
 
 template<typename A>
@@ -640,158 +299,148 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
 }
 
 template<typename A>
-bool compact_theta_sketch_alloc<A>::is_ordered() const {
-  return is_ordered_;
+auto compact_theta_sketch_alloc<A>::begin() -> iterator {
+  return iterator(entries_.data(), entries_.size(), 0);
 }
 
 template<typename A>
-string<A> compact_theta_sketch_alloc<A>::to_string(bool print_items) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  os << "### Compact Theta sketch summary:" << std::endl;
-  os << "   num retained keys    : " << keys_.size() << std::endl;
-  os << "   seed hash            : " << this->get_seed_hash() << std::endl;
-  os << "   empty?               : " << (this->is_empty() ? "true" : "false") << std::endl;
-  os << "   ordered?             : " << (this->is_ordered() ? "true" : "false") << std::endl;
-  os << "   estimation mode?     : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
-  os << "   theta (fraction)     : " << this->get_theta() << std::endl;
-  os << "   theta (raw 64-bit)   : " << this->theta_ << std::endl;
-  os << "   estimate             : " << this->get_estimate() << std::endl;
-  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
-  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
-  os << "### End sketch summary" << std::endl;
-  if (print_items) {
-    os << "### Retained keys" << std::endl;
-    for (auto key: *this) os << "   " << key << std::endl;
-    os << "### End retained keys" << std::endl;
-  }
-  return os.str();
+auto compact_theta_sketch_alloc<A>::end() -> iterator {
+  return iterator(nullptr, 0, entries_.size());
 }
 
 template<typename A>
+auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
+  return const_iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+void compact_theta_sketch_alloc<A>::print_specifics(ostrstream&) const {}
+
+template<typename A>
 void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
-  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
+  const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
   os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
+  const uint8_t serial_version = SERIAL_VERSION;
   os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
   const uint8_t type = SKETCH_TYPE;
   os.write(reinterpret_cast<const char*>(&type), sizeof(type));
   const uint16_t unused16 = 0;
   os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
   const uint8_t flags_byte(
-    (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
-    (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
-    (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
+    (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
   const uint16_t seed_hash = get_seed_hash();
-  os.write((char*)&seed_hash, sizeof(seed_hash));
+  os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_keys = keys_.size();
-      os.write((char*)&num_keys, sizeof(num_keys));
+      const uint32_t num_entries = entries_.size();
+      os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
       const uint32_t unused32 = 0;
-      os.write((char*)&unused32, sizeof(unused32));
+      os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
       if (this->is_estimation_mode()) {
-        os.write((char*)&(this->theta_), sizeof(uint64_t));
+        os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
       }
     }
-    os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
+    os.write(reinterpret_cast<const char*>(entries_.data()), entries_.size() * sizeof(uint64_t));
   }
 }
 
 template<typename A>
-vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
+auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
+  const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
-  vector_u8<A> bytes(size);
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
+      + sizeof(uint64_t) * entries_.size();
+  vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
   ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
+  const uint8_t serial_version = SERIAL_VERSION;
   ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
   const uint8_t type = SKETCH_TYPE;
   ptr += copy_to_mem(&type, ptr, sizeof(type));
   const uint16_t unused16 = 0;
   ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
   const uint8_t flags_byte(
-    (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
-    (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
-    (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
+    (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
   const uint16_t seed_hash = get_seed_hash();
   ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_keys = keys_.size();
-      ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
+      const uint32_t num_entries = entries_.size();
+      ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
       const uint32_t unused32 = 0;
       ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
       if (this->is_estimation_mode()) {
-        ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
+        ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
       }
     }
-    ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
+    ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
   }
-
   return bytes;
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
+  is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
   uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
+  is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
   uint8_t type;
-  is.read((char*)&type, sizeof(type));
+  is.read(reinterpret_cast<char*>(&type), sizeof(type));
   uint16_t unused16;
-  is.read((char*)&unused16, sizeof(unused16));
+  is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
   uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
+  is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
   uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
-  uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint32_t num_keys = 0;
-
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+  is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
+  checker<true>::check_sketch_type(type, SKETCH_TYPE);
+  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
+
+  uint64_t theta = theta_constants::MAX_THETA;
+  uint32_t num_entries = 0;
   if (!is_empty) {
     if (preamble_longs == 1) {
-      num_keys = 1;
+      num_entries = 1;
     } else {
-      is.read((char*)&num_keys, sizeof(num_keys));
+      is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
       uint32_t unused32;
-      is.read((char*)&unused32, sizeof(unused32));
+      is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
       if (preamble_longs > 2) {
-        is.read((char*)&theta, sizeof(theta));
+        is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
       }
     }
   }
-  vector_u64<A> keys(num_keys);
-  if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  if (!is_empty) is.read(reinterpret_cast<char*>(entries.data()), sizeof(uint64_t) * entries.size());
 
-  const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
-  if (!is.good()) throw std::runtime_error("error reading from std::istream"); 
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
+  const char* base = ptr;
   uint8_t preamble_longs;
   ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
   uint8_t serial_version;
@@ -804,28 +453,19 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
   ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
   uint16_t seed_hash;
   ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
-  const char* ptr = static_cast<const char*>(bytes);
-  const char* base = ptr;
-
-  uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint32_t num_keys = 0;
+  checker<true>::check_sketch_type(type, SKETCH_TYPE);
+  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
 
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+  uint64_t theta = theta_constants::MAX_THETA;
+  uint32_t num_entries = 0;
   if (!is_empty) {
     if (preamble_longs == 1) {
-      num_keys = 1;
+      num_entries = 1;
     } else {
       ensure_minimum_memory(size, 8); // read the first prelong before this method
-      ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
+      ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
       uint32_t unused32;
       ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
       if (preamble_longs > 2) {
@@ -834,106 +474,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ
       }
     }
   }
-  const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
-  check_memory_size(ptr - base + keys_size_bytes, size);
-  vector_u64<A> keys(num_keys);
-  if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
-
-  const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
-}
-
-// builder
-
-template<typename A>
-update_theta_sketch_alloc<A>::builder::builder():
-lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
-  if (lg_k < MIN_LG_K) {
-    throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
-  }
-  lg_k_ = lg_k;
-  return *this;
-}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_resize_factor(resize_factor rf) {
-  rf_ = rf;
-  return *this;
-}
+  const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
+  check_memory_size(ptr - base + entries_size_bytes, size);
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
 
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_p(float p) {
-  p_ = p;
-  return *this;
-}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_seed(uint64_t seed) {
-  seed_ = seed;
-  return *this;
-}
-
-template<typename A>
-uint8_t update_theta_sketch_alloc<A>::builder::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
-  return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
-  return update_theta_sketch_alloc<A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
-}
-
-// iterator
-
-template<typename A>
-theta_sketch_alloc<A>::const_iterator::const_iterator(const uint64_t* keys, uint32_t size, uint32_t index):
-keys_(keys), size_(size), index_(index) {
-  while (index_ < size_ && keys_[index_] == 0) ++index_;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator& theta_sketch_alloc<A>::const_iterator::operator++() {
-  do {
-    ++index_;
-  } while (index_ < size_ && keys_[index_] == 0);
-  return *this;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator theta_sketch_alloc<A>::const_iterator::operator++(int) {
-  const_iterator tmp(*this);
-  operator++();
-  return tmp;
-}
-
-template<typename A>
-bool theta_sketch_alloc<A>::const_iterator::operator==(const const_iterator& other) const {
-  return index_ == other.index_;
-}
-
-template<typename A>
-bool theta_sketch_alloc<A>::const_iterator::operator!=(const const_iterator& other) const {
-  return index_ != other.index_;
-}
-
-template<typename A>
-uint64_t theta_sketch_alloc<A>::const_iterator::operator*() const {
-  return keys_[index_];
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }
 
 } /* namespace datasketches */
 
 #endif
+
diff --git a/be/src/thirdparty/datasketches/theta_union.hpp b/be/src/thirdparty/datasketches/theta_union.hpp
index 6cf8ccc..44f9b52 100644
--- a/be/src/thirdparty/datasketches/theta_union.hpp
+++ b/be/src/thirdparty/datasketches/theta_union.hpp
@@ -20,103 +20,70 @@
 #ifndef THETA_UNION_HPP_
 #define THETA_UNION_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
+#include "serde.hpp"
 #include "theta_sketch.hpp"
+#include "theta_union_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_union_alloc {
 public:
-  class builder;
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_alloc<Allocator>;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+  using resize_factor = theta_constants::resize_factor;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
+  using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
 
   // No constructor here. Use builder instead.
+  class builder;
 
   /**
    * This method is to update the union with a given sketch
    * @param sketch to update the union with
    */
-  void update(const theta_sketch_alloc<A>& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   /**
    * This method produces a copy of the current state of the union as a compact sketch.
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return the result of the union
    */
-  compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
+  CompactSketch get_result(bool ordered = true) const;
 
 private:
-  bool is_empty_;
-  uint64_t theta_;
-  update_theta_sketch_alloc<A> state_;
+  State state_;
 
   // for builder
-  theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state);
+  theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
 };
 
-// builder
-
 template<typename A>
-class theta_union_alloc<A>::builder {
+class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
 public:
-  typedef typename update_theta_sketch_alloc<A>::resize_factor resize_factor;
-
-  /**
-   * Set log2(k), where k is a nominal number of entries in the sketch
-   * @param lg_k base 2 logarithm of nominal number of entries
-   * @return this builder
-   */
-  builder& set_lg_k(uint8_t lg_k);
-
-  /**
-   * Set resize factor for the internal hash table (defaults to 8)
-   * @param rf resize factor
-   * @return this builder
-   */
-  builder& set_resize_factor(resize_factor rf);
-
-  /**
-   * Set sampling probability (initial theta). The default is 1, so the sketch retains
-   * all entries until it reaches the limit, at which point it goes into the estimation mode
-   * and reduces the effective sampling probability (theta) as necessary.
-   * @param p sampling probability
-   * @return this builder
-   */
-  builder& set_p(float p);
-
-  /**
-   * Set the seed for the hash function. Should be used carefully if needed.
-   * Sketches produced with different seed are not compatible
-   * and cannot be mixed in set operations.
-   * @param seed hash seed
-   * @return this builder
-   */
-  builder& set_seed(uint64_t seed);
+  builder(const A& allocator = A());
 
   /**
    * This is to create an instance of the union with predefined parameters.
-   * @return and instance of the union
+   * @return an instance of the union
    */
   theta_union_alloc<A> build() const;
-
-private:
-  typename update_theta_sketch_alloc<A>::builder sketch_builder;
 };
 
 // alias with default allocator for convenience
-typedef theta_union_alloc<std::allocator<void>> theta_union;
+using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
 #include "theta_union_impl.hpp"
 
-# endif
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_base.hpp b/be/src/thirdparty/datasketches/theta_union_base.hpp
new file mode 100644
index 0000000..d41f5bd
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_union_base.hpp
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_BASE_HPP_
+#define THETA_UNION_BASE_HPP_
+
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Policy,
+  typename Sketch,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_union_base {
+public:
+  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename hash_table::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+
+  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
+
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
+
+  CompactSketch get_result(bool ordered = true) const;
+
+  const Policy& get_policy() const;
+
+private:
+  Policy policy_;
+  hash_table table_;
+  uint64_t union_theta_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_union_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_base_impl.hpp b/be/src/thirdparty/datasketches/theta_union_base_impl.hpp
new file mode 100644
index 0000000..ec8ce56
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_union_base_impl.hpp
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_BASE_IMPL_HPP_
+#define THETA_UNION_BASE_IMPL_HPP_
+
+#include <algorithm>
+
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
+policy_(policy),
+table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
+union_theta_(table_.theta_)
+{}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+template<typename SS>
+void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
+  if (sketch.is_empty()) return;
+  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+  table_.is_empty_ = false;
+  if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
+  for (auto& entry: sketch) {
+    const uint64_t hash = EK()(entry);
+    if (hash < union_theta_) {
+      auto result = table_.find(hash);
+      if (!result.second) {
+        table_.insert(result.first, conditional_forward<SS>(entry));
+      } else {
+        policy_(*result.first, conditional_forward<SS>(entry));
+      }
+    } else {
+      if (sketch.is_ordered()) break; // early stop
+    }
+  }
+  if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+  std::vector<EN, A> entries(table_.allocator_);
+  if (table_.is_empty_) return CS(true, true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
+  entries.reserve(table_.num_entries_);
+  uint64_t theta = std::min(union_theta_, table_.theta_);
+  const uint32_t nominal_num = 1 << table_.lg_nom_size_;
+  if (union_theta_ >= theta && table_.num_entries_ <= nominal_num) {
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
+  } else {
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
+    if (entries.size() > nominal_num) {
+      std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
+      theta = EK()(entries[nominal_num]);
+      entries.erase(entries.begin() + nominal_num, entries.end());
+      entries.shrink_to_fit();
+    }
+  }
+  if (ordered) std::sort(entries.begin(), entries.end(), comparator());
+  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
+  return policy_;
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_impl.hpp b/be/src/thirdparty/datasketches/theta_union_impl.hpp
index 4d8ebaa..4708d70 100644
--- a/be/src/thirdparty/datasketches/theta_union_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_union_impl.hpp
@@ -22,86 +22,30 @@
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
-theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
-is_empty_(true), theta_(theta), state_(std::move(state)) {}
-
-template<typename A>
-void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
-  if (sketch.is_empty()) return;
-  if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
-  is_empty_ = false;
-  if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
-  if (sketch.is_ordered()) {
-    for (auto hash: sketch) {
-      if (hash >= theta_) break; // early stop
-      state_.internal_update(hash);
-    }
-  } else {
-    for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
-  }
-  if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
-}
-
 template<typename A>
-compact_theta_sketch_alloc<A> theta_union_alloc<A>::get_result(bool ordered) const {
-  if (is_empty_) return state_.compact(ordered);
-  const uint32_t nom_num_keys = 1 << state_.lg_nom_size_;
-  if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
-  uint64_t theta = std::min(theta_, state_.get_theta64());
-  vector_u64<A> keys(state_.get_num_retained());
-  uint32_t num_keys = 0;
-  for (auto key: state_) {
-    if (key < theta) keys[num_keys++] = key;
-  }
-  if (num_keys > nom_num_keys) {
-    std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
-    theta = keys[nom_num_keys];
-    num_keys = nom_num_keys;
-  }
-  if (num_keys != state_.get_num_retained()) {
-    keys.resize(num_keys);
-  }
-  if (ordered) std::sort(keys.begin(), keys.end());
-  return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
-}
-
-// builder
+theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
+{}
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
-  sketch_builder.set_lg_k(lg_k);
-  return *this;
+template<typename SS>
+void theta_union_alloc<A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
 }
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_resize_factor(resize_factor rf) {
-  sketch_builder.set_resize_factor(rf);
-  return *this;
+auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
 }
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_p(float p) {
-  sketch_builder.set_p(p);
-  return *this;
-}
-
-template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
-  sketch_builder.set_seed(seed);
-  return *this;
-}
+theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
 
 template<typename A>
-theta_union_alloc<A> theta_union_alloc<A>::builder::build() const {
-  update_theta_sketch_alloc<A> sketch = sketch_builder.build();
-  return theta_union_alloc(sketch.get_theta64(), std::move(sketch));
+auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
+  return theta_union_alloc(
+      this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
+      this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp b/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp
new file mode 100644
index 0000000..eae7984
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UPDATE_SKETCH_BASE_HPP_
+#define THETA_UPDATE_SKETCH_BASE_HPP_
+
+#include <vector>
+#include <climits>
+#include <cmath>
+
+#include "common_defs.hpp"
+#include "MurmurHash3.h"
+#include "theta_comparators.hpp"
+#include "theta_constants.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Allocator
+>
+struct theta_update_sketch_base {
+  using resize_factor = theta_constants::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+
+  theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator, bool is_empty = true);
+  theta_update_sketch_base(const theta_update_sketch_base& other);
+  theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
+  ~theta_update_sketch_base();
+  theta_update_sketch_base& operator=(const theta_update_sketch_base& other);
+  theta_update_sketch_base& operator=(theta_update_sketch_base&& other);
+
+  using iterator = Entry*;
+
+  inline uint64_t hash_and_screen(const void* data, size_t length);
+
+  inline std::pair<iterator, bool> find(uint64_t key) const;
+
+  template<typename FwdEntry>
+  inline void insert(iterator it, FwdEntry&& entry);
+
+  iterator begin() const;
+  iterator end() const;
+
+  // resize threshold = 0.5 tuned for speed
+  static constexpr double RESIZE_THRESHOLD = 0.5;
+  // hash table rebuild threshold = 15/16
+  static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
+
+  static constexpr uint8_t STRIDE_HASH_BITS = 7;
+  static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
+
+  Allocator allocator_;
+  bool is_empty_;
+  uint8_t lg_cur_size_;
+  uint8_t lg_nom_size_;
+  resize_factor rf_;
+  uint32_t num_entries_;
+  uint64_t theta_;
+  uint64_t seed_;
+  Entry* entries_;
+
+  void resize();
+  void rebuild();
+  void trim();
+
+  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
+  static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
+  static void consolidate_non_empty(Entry* entries, size_t size, size_t num);
+};
+
+// builder
+
+template<typename Derived, typename Allocator>
+class theta_base_builder {
+public:
+  using resize_factor = theta_constants::resize_factor;
+  static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
+  static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
+  static const uint8_t DEFAULT_LG_K = 12;
+  static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+
+  /**
+   * Creates and instance of the builder with default parameters.
+   */
+  theta_base_builder(const Allocator& allocator);
+
+  /**
+   * Set log2(k), where k is a nominal number of entries in the sketch
+   * @param lg_k base 2 logarithm of nominal number of entries
+   * @return this builder
+   */
+  Derived& set_lg_k(uint8_t lg_k);
+
+  /**
+   * Set resize factor for the internal hash table (defaults to 8)
+   * @param rf resize factor
+   * @return this builder
+   */
+  Derived& set_resize_factor(resize_factor rf);
+
+  /**
+   * Set sampling probability (initial theta). The default is 1, so the sketch retains
+   * all entries until it reaches the limit, at which point it goes into the estimation mode
+   * and reduces the effective sampling probability (theta) as necessary.
+   * @param p sampling probability
+   * @return this builder
+   */
+  Derived& set_p(float p);
+
+  /**
+   * Set the seed for the hash function. Should be used carefully if needed.
+   * Sketches produced with different seed are not compatible
+   * and cannot be mixed in set operations.
+   * @param seed hash seed
+   * @return this builder
+   */
+  Derived& set_seed(uint64_t seed);
+
+protected:
+  Allocator allocator_;
+  uint8_t lg_k_;
+  resize_factor rf_;
+  float p_;
+  uint64_t seed_;
+
+  uint64_t starting_theta() const;
+  uint8_t starting_lg_size() const;
+  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
+};
+
+// key extractor
+
+struct trivial_extract_key {
+  template<typename T>
+  auto operator()(T&& entry) const -> decltype(std::forward<T>(entry)) {
+    return std::forward<T>(entry);
+  }
+};
+
+// key not zero
+
+template<typename Entry, typename ExtractKey>
+class key_not_zero {
+public:
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) != 0;
+  }
+};
+
+template<typename Key, typename Entry, typename ExtractKey>
+class key_not_zero_less_than {
+public:
+  explicit key_not_zero_less_than(const Key& key): key(key) {}
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) != 0 && ExtractKey()(entry) < this->key;
+  }
+private:
+  Key key;
+};
+
+// MurMur3 hash functions
+
+static inline uint64_t compute_hash(const void* data, size_t length, uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(data, length, seed, hashes);
+  return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
+}
+
+// iterators
+
+template<typename Entry, typename ExtractKey>
+class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+  theta_iterator(Entry* entries, uint32_t size, uint32_t index);
+  theta_iterator& operator++();
+  theta_iterator operator++(int);
+  bool operator==(const theta_iterator& other) const;
+  bool operator!=(const theta_iterator& other) const;
+  Entry& operator*() const;
+
+private:
+  Entry* entries_;
+  uint32_t size_;
+  uint32_t index_;
+};
+
+template<typename Entry, typename ExtractKey>
+class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+  theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
+  theta_const_iterator& operator++();
+  theta_const_iterator operator++(int);
+  bool operator==(const theta_const_iterator& other) const;
+  bool operator!=(const theta_const_iterator& other) const;
+  const Entry& operator*() const;
+
+private:
+  const Entry* entries_;
+  uint32_t size_;
+  uint32_t index_;
+};
+
+// double value canonicalization for compatibility with Java
+static inline int64_t canonical_double(double value) {
+  union {
+    int64_t long_value;
+    double double_value;
+  } long_double_union;
+
+  if (value == 0.0) {
+    long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
+  } else if (std::isnan(value)) {
+    long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
+  } else {
+    long_double_union.double_value = value;
+  }
+  return long_double_union.long_value;
+}
+
+} /* namespace datasketches */
+
+#include "theta_update_sketch_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp b/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp
new file mode 100644
index 0000000..a343c78
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
+#define THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
+allocator_(allocator),
+is_empty_(is_empty),
+lg_cur_size_(lg_cur_size),
+lg_nom_size_(lg_nom_size),
+rf_(rf),
+num_entries_(0),
+theta_(theta),
+seed_(seed),
+entries_(nullptr)
+{
+  if (lg_cur_size > 0) {
+    const size_t size = 1 << lg_cur_size;
+    entries_ = allocator_.allocate(size);
+    for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(const theta_update_sketch_base& other):
+allocator_(other.allocator_),
+is_empty_(other.is_empty_),
+lg_cur_size_(other.lg_cur_size_),
+lg_nom_size_(other.lg_nom_size_),
+rf_(other.rf_),
+num_entries_(other.num_entries_),
+theta_(other.theta_),
+seed_(other.seed_),
+entries_(nullptr)
+{
+  if (other.entries_ != nullptr) {
+    const size_t size = 1 << lg_cur_size_;
+    entries_ = allocator_.allocate(size);
+    for (size_t i = 0; i < size; ++i) {
+      if (EK()(other.entries_[i]) != 0) {
+        new (&entries_[i]) EN(other.entries_[i]);
+      } else {
+        EK()(entries_[i]) = 0;
+      }
+    }
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
+allocator_(std::move(other.allocator_)),
+is_empty_(other.is_empty_),
+lg_cur_size_(other.lg_cur_size_),
+lg_nom_size_(other.lg_nom_size_),
+rf_(other.rf_),
+num_entries_(other.num_entries_),
+theta_(other.theta_),
+seed_(other.seed_),
+entries_(other.entries_)
+{
+  other.entries_ = nullptr;
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
+{
+  if (entries_ != nullptr) {
+    const size_t size = 1 << lg_cur_size_;
+    for (size_t i = 0; i < size; ++i) {
+      if (EK()(entries_[i]) != 0) entries_[i].~EN();
+    }
+    allocator_.deallocate(entries_, size);
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(const theta_update_sketch_base& other) {
+  theta_update_sketch_base<EN, EK, A> copy(other);
+  std::swap(allocator_, copy.allocator_);
+  std::swap(is_empty_, copy.is_empty_);
+  std::swap(lg_cur_size_, copy.lg_cur_size_);
+  std::swap(lg_nom_size_, copy.lg_nom_size_);
+  std::swap(rf_, copy.rf_);
+  std::swap(num_entries_, copy.num_entries_);
+  std::swap(theta_, copy.theta_);
+  std::swap(seed_, copy.seed_);
+  std::swap(entries_, copy.entries_);
+  return *this;
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(theta_update_sketch_base&& other) {
+  std::swap(allocator_, other.allocator_);
+  std::swap(is_empty_, other.is_empty_);
+  std::swap(lg_cur_size_, other.lg_cur_size_);
+  std::swap(lg_nom_size_, other.lg_nom_size_);
+  std::swap(rf_, other.rf_);
+  std::swap(num_entries_, other.num_entries_);
+  std::swap(theta_, other.theta_);
+  std::swap(seed_, other.seed_);
+  std::swap(entries_, other.entries_);
+  return *this;
+}
+
+template<typename EN, typename EK, typename A>
+uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data, size_t length) {
+  is_empty_ = false;
+  const uint64_t hash = compute_hash(data, length, seed_);
+  if (hash >= theta_) return 0; // hash == 0 is reserved to mark empty slots in the table
+  return hash;
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
+  const size_t size = 1 << lg_cur_size_;
+  const size_t mask = size - 1;
+  const uint32_t stride = get_stride(key, lg_cur_size_);
+  uint32_t index = static_cast<uint32_t>(key) & mask;
+  // search for duplicate or zero
+  const uint32_t loop_index = index;
+  do {
+    const uint64_t probe = EK()(entries_[index]);
+    if (probe == 0) {
+      return std::pair<iterator, bool>(&entries_[index], false);
+    } else if (probe == key) {
+      return std::pair<iterator, bool>(&entries_[index], true);
+    }
+    index = (index + stride) & mask;
+  } while (index != loop_index);
+  throw std::logic_error("key not found and no empty slots!");
+}
+
+template<typename EN, typename EK, typename A>
+template<typename Fwd>
+void theta_update_sketch_base<EN, EK, A>::insert(iterator it, Fwd&& entry) {
+  new (it) EN(std::forward<Fwd>(entry));
+  ++num_entries_;
+  if (num_entries_ > get_capacity(lg_cur_size_, lg_nom_size_)) {
+    if (lg_cur_size_ <= lg_nom_size_) {
+      resize();
+    } else {
+      rebuild();
+    }
+  }
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
+  return entries_;
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
+  return &entries_[1 << lg_cur_size_];
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
+  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
+  return std::floor(fraction * (1 << lg_cur_size));
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t lg_size) {
+  // odd and independent of index assuming lg_size lowest bits of the key were used for the index
+  return (2 * static_cast<uint32_t>((key >> lg_size) & STRIDE_MASK)) + 1;
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::resize() {
+  const size_t old_size = 1 << lg_cur_size_;
+  const uint8_t lg_tgt_size = lg_nom_size_ + 1;
+  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
+  lg_cur_size_ += factor;
+  const size_t new_size = 1 << lg_cur_size_;
+  EN* old_entries = entries_;
+  entries_ = allocator_.allocate(new_size);
+  for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  for (size_t i = 0; i < old_size; ++i) {
+    const uint64_t key = EK()(old_entries[i]);
+    if (key != 0) {
+      insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
+      old_entries[i].~EN();
+    }
+  }
+  allocator_.deallocate(old_entries, old_size);
+}
+
+// assumes number of entries > nominal size
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::rebuild() {
+  const size_t size = 1 << lg_cur_size_;
+  const uint32_t nominal_size = 1 << lg_nom_size_;
+
+  // empty entries have uninitialized payloads
+  // TODO: avoid this for empty or trivial payloads (arithmetic types)
+  consolidate_non_empty(entries_, size, num_entries_);
+
+  std::nth_element(entries_, entries_ + nominal_size, entries_ + num_entries_, comparator());
+  this->theta_ = EK()(entries_[nominal_size]);
+  EN* old_entries = entries_;
+  const size_t num_old_entries = num_entries_;
+  entries_ = allocator_.allocate(size);
+  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  // relies on consolidating non-empty entries to the front
+  for (size_t i = 0; i < nominal_size; ++i) {
+    insert(find(EK()(old_entries[i])).first, std::move(old_entries[i])); // consider a special insert with no comparison
+    old_entries[i].~EN();
+  }
+  for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
+  allocator_.deallocate(old_entries, size);
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::trim() {
+  if (num_entries_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, size_t size, size_t num) {
+  // find the first empty slot
+  size_t i = 0;
+  while (i < size) {
+    if (EK()(entries[i]) == 0) break;
+    ++i;
+  }
+  // scan the rest and move non-empty entries to the front
+  for (size_t j = i + 1; j < size; ++j) {
+    if (EK()(entries[j]) != 0) {
+      new (&entries[i]) EN(std::move(entries[j]));
+      entries[j].~EN();
+      EK()(entries[j]) = 0;
+      ++i;
+      if (i == num) break;
+    }
+  }
+}
+
+// builder
+
+template<typename Derived, typename Allocator>
+theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
+allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
+  if (lg_k < MIN_LG_K) {
+    throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
+  }
+  if (lg_k > MAX_LG_K) {
+    throw std::invalid_argument("lg_k must not be greater than " + std::to_string(MAX_LG_K) + ": " + std::to_string(lg_k));
+  }
+  lg_k_ = lg_k;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_resize_factor(resize_factor rf) {
+  rf_ = rf;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_p(float p) {
+  if (p <= 0 || p > 1) throw std::invalid_argument("sampling probability must be between 0 and 1");
+  p_ = p;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
+  seed_ = seed;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
+  if (p_ < 1) return theta_constants::MAX_THETA * p_;
+  return theta_constants::MAX_THETA;
+}
+
+template<typename Derived, typename Allocator>
+uint8_t theta_base_builder<Derived, Allocator>::starting_lg_size() const {
+  return starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
+}
+
+template<typename Derived, typename Allocator>
+uint8_t theta_base_builder<Derived, Allocator>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
+  return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
+}
+
+// iterator
+
+template<typename Entry, typename ExtractKey>
+theta_iterator<Entry, ExtractKey>::theta_iterator(Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++() -> theta_iterator& {
+  ++index_;
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+  return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++(int) -> theta_iterator {
+  theta_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator!=(const theta_iterator& other) const {
+  return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other) const {
+  return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
+  return entries_[index_];
+}
+
+// const iterator
+
+template<typename Entry, typename ExtractKey>
+theta_const_iterator<Entry, ExtractKey>::theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++() -> theta_const_iterator& {
+  ++index_;
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+  return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++(int) -> theta_const_iterator {
+  theta_const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator!=(const theta_const_iterator& other) const {
+  return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_iterator& other) const {
+  return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry& {
+  return entries_[index_];
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/u32_table.hpp b/be/src/thirdparty/datasketches/u32_table.hpp
index 2316fc1..fe228a5 100644
--- a/be/src/thirdparty/datasketches/u32_table.hpp
+++ b/be/src/thirdparty/datasketches/u32_table.hpp
@@ -39,8 +39,8 @@ template<typename A>
 class u32_table {
 public:
 
-  u32_table();
-  u32_table(uint8_t lg_size, uint8_t num_valid_bits);
+  u32_table(const A& allocator);
+  u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
 
   inline size_t get_num_items() const;
   inline const uint32_t* get_slots() const;
@@ -52,7 +52,7 @@ public:
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
 
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
+  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
 
   vector_u32<A> unwrapping_get_items() const;
 
diff --git a/be/src/thirdparty/datasketches/u32_table_impl.hpp b/be/src/thirdparty/datasketches/u32_table_impl.hpp
index aa44ba2..bf8ece9 100644
--- a/be/src/thirdparty/datasketches/u32_table_impl.hpp
+++ b/be/src/thirdparty/datasketches/u32_table_impl.hpp
@@ -29,19 +29,19 @@
 namespace datasketches {
 
 template<typename A>
-u32_table<A>::u32_table():
+u32_table<A>::u32_table(const A& allocator):
 lg_size(0),
 num_valid_bits(0),
 num_items(0),
-slots()
+slots(allocator)
 {}
 
 template<typename A>
-u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
+u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX)
+slots(1 << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
@@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
 
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
-  u32_table<A> table(lg_num_slots, 6 + lg_k);
+  u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
   // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
   // However, we are starting out with the correct final table size, so the problem might not occur
   for (size_t i = 0; i < num_pairs; i++) {
@@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
-  slots = vector_u32<A>(new_size, UINT32_MAX);
+  slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 // The result is nearly sorted, so make sure to use an efficient sort for that case
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return vector_u32<A>();
+  if (num_items == 0) return vector_u32<A>(slots.get_allocator());
   const size_t table_size = 1 << lg_size;
-  vector_u32<A> result(num_items);
+  vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;