You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2021/04/26 15:46:44 UTC

[impala] branch master updated (1fb7dba -> 75fa056)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from 1fb7dba  IMPALA-10445: Adjust NDV's scale with query option
     new 8336b7b  IMPALA-10658: LOAD DATA INPATH silently fails between HDFS and Azure ABFS
     new 91d2ab2  IMPALA-10584: Defer advancing read page if stream only has 2 pages.
     new 8b46d00  IMPALA-10611: Fix flakiness in test_wide_row
     new 75fa056  IMPALA-10631: Upgrade DataSketches to version 3.0.0

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/exprs/datasketches-test.cc                  |  12 +-
 be/src/runtime/buffered-tuple-stream-test.cc       |  70 ++
 be/src/runtime/buffered-tuple-stream.cc            |  31 +-
 be/src/runtime/buffered-tuple-stream.h             |   1 +
 .../datasketches/AuxHashMap-internal.hpp           |  89 +--
 be/src/thirdparty/datasketches/AuxHashMap.hpp      |  17 +-
 .../datasketches/CompositeInterpolationXTable.hpp  |   4 +-
 .../datasketches/CouponHashSet-internal.hpp        |  82 +-
 be/src/thirdparty/datasketches/CouponHashSet.hpp   |  20 +-
 .../datasketches/CouponList-internal.hpp           | 122 ++-
 be/src/thirdparty/datasketches/CouponList.hpp      |  23 +-
 .../thirdparty/datasketches/CubicInterpolation.hpp |   4 +-
 be/src/thirdparty/datasketches/HarmonicNumbers.hpp |   4 +-
 .../thirdparty/datasketches/Hll4Array-internal.hpp |  29 +-
 be/src/thirdparty/datasketches/Hll4Array.hpp       |   2 +-
 .../thirdparty/datasketches/Hll6Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll6Array.hpp       |   5 +-
 .../thirdparty/datasketches/Hll8Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll8Array.hpp       |   5 +-
 .../thirdparty/datasketches/HllArray-internal.hpp  |  83 +-
 be/src/thirdparty/datasketches/HllArray.hpp        |  16 +-
 .../thirdparty/datasketches/HllSketch-internal.hpp |  20 +-
 be/src/thirdparty/datasketches/HllSketchImpl.hpp   |   3 +-
 .../datasketches/HllSketchImplFactory.hpp          |  65 +-
 .../thirdparty/datasketches/HllUnion-internal.hpp  |  31 +-
 be/src/thirdparty/datasketches/HllUtil.hpp         |   2 +-
 be/src/thirdparty/datasketches/MurmurHash3.h       |   7 +
 be/src/thirdparty/datasketches/README.md           |   6 +-
 .../datasketches/RelativeErrorTables.hpp           |   2 +-
 .../bounds_on_ratios_in_sampled_sets.hpp           | 136 ++++
 .../bounds_on_ratios_in_theta_sketched_sets.hpp    | 135 ++++
 be/src/thirdparty/datasketches/cpc_common.hpp      |   3 +
 be/src/thirdparty/datasketches/cpc_compressor.hpp  |   4 +-
 .../datasketches/cpc_compressor_impl.hpp           |  47 +-
 be/src/thirdparty/datasketches/cpc_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/cpc_sketch_impl.hpp |  33 +-
 be/src/thirdparty/datasketches/cpc_union.hpp       |   4 +-
 be/src/thirdparty/datasketches/cpc_union_impl.hpp  |  12 +-
 be/src/thirdparty/datasketches/cpc_util.hpp        |   6 -
 be/src/thirdparty/datasketches/hll.hpp             |  40 +-
 be/src/thirdparty/datasketches/icon_estimator.hpp  |   6 +-
 .../datasketches/kll_quantile_calculator.hpp       |   2 +-
 .../datasketches/kll_quantile_calculator_impl.hpp  |   6 +-
 be/src/thirdparty/datasketches/kll_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/kll_sketch_impl.hpp | 168 ++--
 .../thirdparty/datasketches/memory_operations.hpp  |  12 +
 be/src/thirdparty/datasketches/theta_a_not_b.hpp   |  41 +-
 .../thirdparty/datasketches/theta_a_not_b_impl.hpp |  51 +-
 ...eiling_power_of_2.hpp => theta_comparators.hpp} |  39 +-
 ...{ceiling_power_of_2.hpp => theta_constants.hpp} |  23 +-
 be/src/thirdparty/datasketches/theta_helpers.hpp   |  54 ++
 .../thirdparty/datasketches/theta_intersection.hpp |  51 +-
 ..._power_of_2.hpp => theta_intersection_base.hpp} |  50 +-
 .../datasketches/theta_intersection_base_impl.hpp  | 121 +++
 .../datasketches/theta_intersection_impl.hpp       |  98 +--
 ...power_of_2.hpp => theta_jaccard_similarity.hpp} |  26 +-
 .../datasketches/theta_jaccard_similarity_base.hpp | 156 ++++
 ...ower_of_2.hpp => theta_set_difference_base.hpp} |  43 +-
 .../theta_set_difference_base_impl.hpp             |  85 +++
 be/src/thirdparty/datasketches/theta_sketch.hpp    | 398 ++++------
 .../thirdparty/datasketches/theta_sketch_impl.hpp  | 850 +++++----------------
 be/src/thirdparty/datasketches/theta_union.hpp     |  87 +--
 .../thirdparty/datasketches/theta_union_base.hpp   |  60 ++
 .../datasketches/theta_union_base_impl.hpp         |  89 +++
 .../thirdparty/datasketches/theta_union_impl.hpp   |  82 +-
 .../datasketches/theta_update_sketch_base.hpp      | 243 ++++++
 .../datasketches/theta_update_sketch_base_impl.hpp | 394 ++++++++++
 be/src/thirdparty/datasketches/u32_table.hpp       |   6 +-
 be/src/thirdparty/datasketches/u32_table_impl.hpp  |  18 +-
 .../org/apache/impala/common/FileSystemUtil.java   |  16 +-
 .../apache/impala/common/FileSystemUtilTest.java   |  40 +-
 tests/query_test/test_scanners.py                  |   6 +-
 tests/query_test/test_scratch_limit.py             |   6 -
 73 files changed, 2648 insertions(+), 1942 deletions(-)
 create mode 100644 be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp
 create mode 100644 be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp
 copy be/src/thirdparty/datasketches/{ceiling_power_of_2.hpp => theta_comparators.hpp} (59%)
 copy be/src/thirdparty/datasketches/{ceiling_power_of_2.hpp => theta_constants.hpp} (69%)
 create mode 100644 be/src/thirdparty/datasketches/theta_helpers.hpp
 copy be/src/thirdparty/datasketches/{ceiling_power_of_2.hpp => theta_intersection_base.hpp} (50%)
 create mode 100644 be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp
 copy be/src/thirdparty/datasketches/{ceiling_power_of_2.hpp => theta_jaccard_similarity.hpp} (61%)
 create mode 100644 be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp
 copy be/src/thirdparty/datasketches/{ceiling_power_of_2.hpp => theta_set_difference_base.hpp} (50%)
 create mode 100644 be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp
 create mode 100644 be/src/thirdparty/datasketches/theta_union_base.hpp
 create mode 100644 be/src/thirdparty/datasketches/theta_union_base_impl.hpp
 create mode 100644 be/src/thirdparty/datasketches/theta_update_sketch_base.hpp
 create mode 100644 be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp

[impala] 02/04: IMPALA-10584: Defer advancing read page if stream only has 2 pages.

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 91d2ab2116293b8f45afd02d029d47233877c538
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Sun Apr 11 23:10:57 2021 -0700

    IMPALA-10584: Defer advancing read page if stream only has 2 pages.
    
    TestScratchLimit::test_with_unlimited_scratch_limit has been
    intermittently crashing in ubuntu-16.04-dockerised-tests environment
    after result spooling is enabled by default in IMPALA-9856. DCHECK
    violation occurs in ReservationTracker::CheckConsistency() due to
    BufferedTupleStream wrongly tries to reclaim memory reservation while
    unpinning the stream.
    
    For this bug to surface, all of the following needs to happen:
    - Stream is in pinned mode.
    - There are only 2 pages in the stream: 1 read and 1 write.
    - Stream can not increase reservation anymore either due to memory
      pressure or low buffer/memory limit.
    - The stream read page has been fully read and is attached to output
      RowBatch. But the output RowBatch has not cleaned up yet.
    - BufferedTupleStream::UnpinStream is invoked.
    
    The memory accounting bug happens because UnpinStream proceeds to
    NextReadPage where the read page buffer was mistakenly assumed as
    released. default_page_len_ bytes were added into
    write_page_reservation_ and subsequently violates the total memory
    reservation.
    
    This patch fixes the bug by deferring advancement of the read iterator
    in UnpinStream if the read page is attached to output RowBatch and there
    are only 2 pages in the stream. This is OK because after UnpinStream
    finished, the stream is now in unpinned mode and has_read_write_page is
    false. The next AddRow operation is then allowed to unpin the previous
    write page first before reusing the reservation to allocate a new write
    page. The next GetNext call will be responsible to advance the read
    page.
    
    Testing:
    - Add be test DeferAdvancingReadPage.
    - Loop the TestScratchLimit::test_with_unlimited_scratch_limit in my
      local dev machine and verify that each test passed without triggering
      the DCHECK violation.
    - Reenable result spooling in TestScratchLimit that was disabled in
      IMPALA-10559.
    - Pass core tests.
    
    Change-Id: I16137b6e423f190f60c3115a06ccd0f77e9f585a
    Reviewed-on: http://gerrit.cloudera.org:8080/17195
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/runtime/buffered-tuple-stream-test.cc | 70 ++++++++++++++++++++++++++++
 be/src/runtime/buffered-tuple-stream.cc      | 31 +++++++++++-
 be/src/runtime/buffered-tuple-stream.h       |  1 +
 tests/query_test/test_scratch_limit.py       |  6 ---
 4 files changed, 100 insertions(+), 8 deletions(-)

diff --git a/be/src/runtime/buffered-tuple-stream-test.cc b/be/src/runtime/buffered-tuple-stream-test.cc
index 2a0d607..dfdfeba 100644
--- a/be/src/runtime/buffered-tuple-stream-test.cc
+++ b/be/src/runtime/buffered-tuple-stream-test.cc
@@ -1297,6 +1297,76 @@ TEST_F(SimpleTupleStreamTest, UnpinReadPage) {
   write_batch->Reset();
 }
 
+// Test that UnpinStream defer advancing the read page when all rows from the read page
+// are attached to a returned RowBatch but got not enough reservation.
+TEST_F(SimpleTupleStreamTest, DeferAdvancingReadPage) {
+  int num_rows = 1024;
+  int buffer_size = 4 * 1024;
+  // Only give 2 * buffer_size for the stream initial read and write page reservation.
+  Init(2 * buffer_size);
+
+  bool eos;
+  bool got_reservation;
+  Status status;
+  RowBatch* write_batch = CreateIntBatch(0, num_rows, false);
+
+  {
+    // Test unpinning a stream when the read page has been attached to the output batch
+    // and the output batch has NOT been reset.
+    BufferedTupleStream stream(
+        runtime_state_, int_desc_, &client_, buffer_size, buffer_size);
+    ASSERT_OK(stream.Init("SimpleTupleStreamTest::DeferAdvancingReadPage", true));
+    ASSERT_OK(stream.PrepareForReadWrite(true, &got_reservation));
+    ASSERT_TRUE(got_reservation);
+
+    // Add rows to stream.
+    for (int i = 0; i < write_batch->num_rows(); ++i) {
+      EXPECT_TRUE(stream.AddRow(write_batch->GetRow(i), &status));
+      ASSERT_OK(status);
+    }
+
+    // Read until the read page is attached to the output.
+    RowBatch read_batch(int_desc_, num_rows, &tracker_);
+    ASSERT_OK(stream.GetNext(&read_batch, &eos));
+    // If GetNext did hit the capacity of the RowBatch, then the read page should have
+    // been attached to read_batch.
+    ASSERT_TRUE(read_batch.num_rows() < num_rows);
+    ASSERT_TRUE(!eos);
+
+    // We continue adding rows into the stream without releasing the read_batch. We expect
+    // that reservation limit will be hit and stream will need to be unpinned. We also
+    // expect that, after unpinning the stream, subsequent AddRow is always successful
+    // even if we're not immediately releasing the read_batch. We insert write_batch twice
+    // to ensure that we're inserting both in pinned and unpinned mode.
+    ASSERT_TRUE(stream.is_pinned());
+    for (int j = 0; j < 2; ++j) {
+      for (int i = 0; i < write_batch->num_rows(); ++i) {
+        bool succeed = stream.AddRow(write_batch->GetRow(i), &status);
+        ASSERT_TRUE(succeed || stream.is_pinned());
+        if (!succeed) {
+          // Unpin the stream.
+          status = stream.UnpinStream(BufferedTupleStream::UNPIN_ALL_EXCEPT_CURRENT);
+          ASSERT_OK(status);
+          ASSERT_FALSE(stream.is_pinned());
+          ASSERT_EQ(stream.bytes_unpinned(), 0);
+          ASSERT_EQ(stream.pages_.size(), 2);
+          ASSERT_EQ(stream.num_pages_, 2);
+          // Retry inserting this row by decreasing the index.
+          // After stream get into unpinned mode, further inserts should be successful,
+          // even if we're not immediately cleaning up the read_batch.
+          // Stream should be able to unpin the previous write page to reclaim some
+          // memory reservation to allocate new write page.
+          --i;
+        }
+      }
+    }
+    ASSERT_FALSE(stream.is_pinned());
+    stream.Close(nullptr, RowBatch::FlushMode::NO_FLUSH_RESOURCES);
+    read_batch.Reset();
+  }
+  write_batch->Reset();
+}
+
 // Test writing to a stream (AddRow and UnpinStream), even though attached pages have not
 // been released yet.
 TEST_F(SimpleTupleStreamTest, WriteAfterReadAttached) {
diff --git a/be/src/runtime/buffered-tuple-stream.cc b/be/src/runtime/buffered-tuple-stream.cc
index a4b76a2..01edc84 100644
--- a/be/src/runtime/buffered-tuple-stream.cc
+++ b/be/src/runtime/buffered-tuple-stream.cc
@@ -716,14 +716,41 @@ Status BufferedTupleStream::UnpinStream(UnpinMode mode) {
 
   if (pinned_) {
     CHECK_CONSISTENCY_FULL(read_it_);
+    bool defer_advancing_read_page = false;
     if (&*read_it_.read_page_ != write_page_ && read_it_.read_page_ != pages_.end()
         && read_it_.read_page_rows_returned_ == read_it_.read_page_->num_rows) {
-      RETURN_IF_ERROR(NextReadPage(&read_it_));
+      if (read_it_.read_page_->attached_to_output_batch) {
+        if (num_pages_ <= 2) {
+          // NextReadPage will attempt to save default_page_len_ into write reservation if
+          // the stream ended up with only 1 read/write page after advancing the read
+          // page. This can potentially lead to negative unused reservation if the reader
+          // has not freed the row batch where the read page buffer is attached to. We
+          // defer advancing the read page until the next GetNext() call by the reader
+          // (see IMPALA-10584).
+          defer_advancing_read_page = true;
+        }
+      }
+
+      if (!defer_advancing_read_page) {
+        RETURN_IF_ERROR(NextReadPage(&read_it_));
+      }
     }
 
     // If the stream was pinned, there may be some remaining pinned pages that should
     // be unpinned at this point.
-    for (Page& page : pages_) UnpinPageIfNeeded(&page, false);
+    DCHECK_EQ(bytes_unpinned_, 0);
+    std::list<Page>::iterator it = pages_.begin();
+    if (defer_advancing_read_page) {
+      // We skip advancing the read page earlier, so the first page must be a read page
+      // and attached_to_output_batch is true. We should keep the first page pinned. The
+      // next GetNext() call is the one who will be responsible to unpin the first page.
+      DCHECK(read_it_.read_page_ == pages_.begin());
+      ++it;
+    }
+    while (it != pages_.end()) {
+      UnpinPageIfNeeded(&(*it), false);
+      ++it;
+    }
 
     // Check to see if we need to save some of the reservation we freed up.
     if (!NeedWriteReservation(true) && NeedWriteReservation(false)) {
diff --git a/be/src/runtime/buffered-tuple-stream.h b/be/src/runtime/buffered-tuple-stream.h
index 4f76cba..bba6479 100644
--- a/be/src/runtime/buffered-tuple-stream.h
+++ b/be/src/runtime/buffered-tuple-stream.h
@@ -561,6 +561,7 @@ class BufferedTupleStream {
  private:
   DISALLOW_COPY_AND_ASSIGN(BufferedTupleStream);
   friend class SimpleTupleStreamTest_ShortDebugString_Test;
+  friend class SimpleTupleStreamTest_DeferAdvancingReadPage_Test;
 
   /// Runtime state instance used to check for cancellation. Not owned.
   RuntimeState* const state_;
diff --git a/tests/query_test/test_scratch_limit.py b/tests/query_test/test_scratch_limit.py
index 07f34da..8779cd3 100644
--- a/tests/query_test/test_scratch_limit.py
+++ b/tests/query_test/test_scratch_limit.py
@@ -71,7 +71,6 @@ class TestScratchLimit(ImpalaTestSuite):
     exec_option = vector.get_value('exec_option')
     exec_option['buffer_pool_limit'] = self.buffer_pool_limit
     exec_option['scratch_limit'] = '500m'
-    exec_option['spool_query_results'] = '0'
     self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option)
 
   def test_with_low_scratch_limit(self, vector):
@@ -82,7 +81,6 @@ class TestScratchLimit(ImpalaTestSuite):
     exec_option = vector.get_value('exec_option')
     exec_option['buffer_pool_limit'] = self.buffer_pool_limit
     exec_option['scratch_limit'] = '24m'
-    exec_option['spool_query_results'] = '0'
     expected_error = 'Scratch space limit of %s bytes exceeded'
     scratch_limit_in_bytes = 24 * 1024 * 1024
     try:
@@ -99,7 +97,6 @@ class TestScratchLimit(ImpalaTestSuite):
     exec_option = vector.get_value('exec_option')
     exec_option['buffer_pool_limit'] = self.buffer_pool_limit
     exec_option['scratch_limit'] = '0'
-    exec_option['spool_query_results'] = '0'
     for query in self.spilling_queries:
       self.execute_query_expect_failure(query, exec_option)
 
@@ -110,7 +107,6 @@ class TestScratchLimit(ImpalaTestSuite):
     exec_option = vector.get_value('exec_option')
     exec_option['buffer_pool_limit'] = self.buffer_pool_limit
     exec_option['scratch_limit'] = '-1'
-    exec_option['spool_query_results'] = '0'
     self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option)
 
   def test_without_specifying_scratch_limit(self, vector):
@@ -119,7 +115,6 @@ class TestScratchLimit(ImpalaTestSuite):
     """
     exec_option = vector.get_value('exec_option')
     exec_option['buffer_pool_limit'] = self.buffer_pool_limit
-    exec_option['spool_query_results'] = '0'
     self.execute_query_expect_success(self.client, self.spilling_sort_query, exec_option)
 
   def test_with_zero_scratch_limit_no_memory_limit(self, vector):
@@ -129,7 +124,6 @@ class TestScratchLimit(ImpalaTestSuite):
     """
     exec_option = vector.get_value('exec_option')
     exec_option['scratch_limit'] = '0'
-    exec_option['spool_query_results'] = '0'
     for query in self.spilling_queries:
       self.execute_query_expect_success(self.client, query, exec_option)
 

[impala] 04/04: IMPALA-10631: Upgrade DataSketches to version 3.0.0

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 75fa056dc04c6cf2c153de29595d5568db6a074f
Author: Fucun Chu <ch...@hotmail.com>
AuthorDate: Fri Apr 2 00:35:26 2021 +0800

    IMPALA-10631: Upgrade DataSketches to version 3.0.0
    
    Upgrade the external DataSketches files CPC/HLL/KLL/Theta to version
    3.0.0
    
    tests:
     -Ran the tests from tests/query_test/test_datasketches.py
    
    Change-Id: I37622a7643d015b80f55b802421eae826aa7a4f9
    Reviewed-on: http://gerrit.cloudera.org:8080/17294
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/datasketches-test.cc                  |  12 +-
 .../datasketches/AuxHashMap-internal.hpp           |  89 +--
 be/src/thirdparty/datasketches/AuxHashMap.hpp      |  17 +-
 .../datasketches/CompositeInterpolationXTable.hpp  |   4 +-
 .../datasketches/CouponHashSet-internal.hpp        |  82 +-
 be/src/thirdparty/datasketches/CouponHashSet.hpp   |  20 +-
 .../datasketches/CouponList-internal.hpp           | 122 ++-
 be/src/thirdparty/datasketches/CouponList.hpp      |  23 +-
 .../thirdparty/datasketches/CubicInterpolation.hpp |   4 +-
 be/src/thirdparty/datasketches/HarmonicNumbers.hpp |   4 +-
 .../thirdparty/datasketches/Hll4Array-internal.hpp |  29 +-
 be/src/thirdparty/datasketches/Hll4Array.hpp       |   2 +-
 .../thirdparty/datasketches/Hll6Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll6Array.hpp       |   5 +-
 .../thirdparty/datasketches/Hll8Array-internal.hpp |  31 +-
 be/src/thirdparty/datasketches/Hll8Array.hpp       |   5 +-
 .../thirdparty/datasketches/HllArray-internal.hpp  |  83 +-
 be/src/thirdparty/datasketches/HllArray.hpp        |  16 +-
 .../thirdparty/datasketches/HllSketch-internal.hpp |  20 +-
 be/src/thirdparty/datasketches/HllSketchImpl.hpp   |   3 +-
 .../datasketches/HllSketchImplFactory.hpp          |  65 +-
 .../thirdparty/datasketches/HllUnion-internal.hpp  |  31 +-
 be/src/thirdparty/datasketches/HllUtil.hpp         |   2 +-
 be/src/thirdparty/datasketches/MurmurHash3.h       |   7 +
 be/src/thirdparty/datasketches/README.md           |   6 +-
 .../datasketches/RelativeErrorTables.hpp           |   2 +-
 .../bounds_on_ratios_in_sampled_sets.hpp           | 136 ++++
 .../bounds_on_ratios_in_theta_sketched_sets.hpp    | 135 ++++
 be/src/thirdparty/datasketches/cpc_common.hpp      |   3 +
 be/src/thirdparty/datasketches/cpc_compressor.hpp  |   4 +-
 .../datasketches/cpc_compressor_impl.hpp           |  47 +-
 be/src/thirdparty/datasketches/cpc_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/cpc_sketch_impl.hpp |  33 +-
 be/src/thirdparty/datasketches/cpc_union.hpp       |   4 +-
 be/src/thirdparty/datasketches/cpc_union_impl.hpp  |  12 +-
 be/src/thirdparty/datasketches/cpc_util.hpp        |   6 -
 be/src/thirdparty/datasketches/hll.hpp             |  40 +-
 be/src/thirdparty/datasketches/icon_estimator.hpp  |   6 +-
 .../datasketches/kll_quantile_calculator.hpp       |   2 +-
 .../datasketches/kll_quantile_calculator_impl.hpp  |   6 +-
 be/src/thirdparty/datasketches/kll_sketch.hpp      |  13 +-
 be/src/thirdparty/datasketches/kll_sketch_impl.hpp | 168 ++--
 .../thirdparty/datasketches/memory_operations.hpp  |  12 +
 be/src/thirdparty/datasketches/theta_a_not_b.hpp   |  41 +-
 .../thirdparty/datasketches/theta_a_not_b_impl.hpp |  51 +-
 ...ubicInterpolation.hpp => theta_comparators.hpp} |  39 +-
 ...InterpolationXTable.hpp => theta_constants.hpp} |  24 +-
 be/src/thirdparty/datasketches/theta_helpers.hpp   |  54 ++
 .../thirdparty/datasketches/theta_intersection.hpp |  51 +-
 .../datasketches/theta_intersection_base.hpp       |  59 ++
 .../datasketches/theta_intersection_base_impl.hpp  | 121 +++
 .../datasketches/theta_intersection_impl.hpp       |  98 +--
 ...tionXTable.hpp => theta_jaccard_similarity.hpp} |  25 +-
 .../datasketches/theta_jaccard_similarity_base.hpp | 156 ++++
 .../datasketches/theta_set_difference_base.hpp     |  54 ++
 .../theta_set_difference_base_impl.hpp             |  85 +++
 be/src/thirdparty/datasketches/theta_sketch.hpp    | 398 ++++------
 .../thirdparty/datasketches/theta_sketch_impl.hpp  | 850 +++++----------------
 be/src/thirdparty/datasketches/theta_union.hpp     |  87 +--
 .../thirdparty/datasketches/theta_union_base.hpp   |  60 ++
 .../datasketches/theta_union_base_impl.hpp         |  89 +++
 .../thirdparty/datasketches/theta_union_impl.hpp   |  82 +-
 .../datasketches/theta_update_sketch_base.hpp      | 243 ++++++
 .../datasketches/theta_update_sketch_base_impl.hpp | 394 ++++++++++
 be/src/thirdparty/datasketches/u32_table.hpp       |   6 +-
 be/src/thirdparty/datasketches/u32_table_impl.hpp  |  18 +-
 66 files changed, 2545 insertions(+), 1895 deletions(-)

diff --git a/be/src/exprs/datasketches-test.cc b/be/src/exprs/datasketches-test.cc
index 687f070..368050d 100644
--- a/be/src/exprs/datasketches-test.cc
+++ b/be/src/exprs/datasketches-test.cc
@@ -174,22 +174,22 @@ TEST(TestDataSketchesTheta, UseDataSketchesInterface) {
     datasketches::update_theta_sketch sketch1 =
         datasketches::update_theta_sketch::builder().build();
     for (int key = 0; key < 100000; key++) sketch1.update(key);
-    sketch1.serialize(sketch_stream1);
+    sketch1.compact().serialize(sketch_stream1);
 
     // 100000 distinct keys
     datasketches::update_theta_sketch sketch2 =
         datasketches::update_theta_sketch::builder().build();
     for (int key = 50000; key < 150000; key++) sketch2.update(key);
-    sketch2.serialize(sketch_stream2);
+    sketch2.compact().serialize(sketch_stream2);
   }
 
   // this section deserializes the sketches, produces union and intersection
   {
-    datasketches::update_theta_sketch sketch1 =
-        datasketches::update_theta_sketch::deserialize(sketch_stream1);
+    datasketches::compact_theta_sketch sketch1 =
+        datasketches::compact_theta_sketch::deserialize(sketch_stream1);
 
-    datasketches::update_theta_sketch sketch2 =
-        datasketches::update_theta_sketch::deserialize(sketch_stream2);
+    datasketches::compact_theta_sketch sketch2 =
+        datasketches::compact_theta_sketch::deserialize(sketch_stream2);
 
     // union opertion
     datasketches::theta_union u = datasketches::theta_union::builder().build();
diff --git a/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp b/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
index 9a8e135..60142ec 100644
--- a/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
+++ b/be/src/thirdparty/datasketches/AuxHashMap-internal.hpp
@@ -26,42 +26,28 @@
 namespace datasketches {
 
 template<typename A>
-AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
-  : lgConfigK(lgConfigK),
-    lgAuxArrInts(lgAuxArrInts),
-    auxCount(0) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  const int numItems = 1 << lgAuxArrInts;
-  auxIntArr = intAlloc().allocate(numItems);
-  std::fill(auxIntArr, auxIntArr + numItems, 0);
-}
-
-template<typename A>
-AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
-}
+AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
+lgConfigK(lgConfigK),
+lgAuxArrInts(lgAuxArrInts),
+auxCount(0),
+entries(1 << lgAuxArrInts, 0, allocator)
+{}
 
 template<typename A>
-AuxHashMap<A>::AuxHashMap(const AuxHashMap& that)
-  : lgConfigK(that.lgConfigK),
-    lgAuxArrInts(that.lgAuxArrInts),
-    auxCount(that.auxCount) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  const int numItems = 1 << lgAuxArrInts;
-  auxIntArr = intAlloc().allocate(numItems);
-  std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
+AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
+  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
+  return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
                                           int lgConfigK,
                                           int auxCount, int lgAuxArrInts,
-                                          bool srcCompact) {
+                                          bool srcCompact, const A& allocator) {
   int lgArrInts = lgAuxArrInts;
   if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
     lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
@@ -77,7 +63,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
     if (len < auxCount * sizeof(int)) {
       throw std::out_of_range("Input array too small to hold AuxHashMap image");
     }
-    auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+    auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
     for (int i = 0; i < auxCount; ++i) {
       int pair = auxPtr[i];
       int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
@@ -89,7 +75,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
     if (len < itemsToRead * sizeof(int)) {
       throw std::out_of_range("Input array too small to hold AuxHashMap image");
     }
-    auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+    auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
     for (int i = 0; i < itemsToRead; ++i) {
       int pair = auxPtr[i];
       if (pair == HllUtil<A>::EMPTY) { continue; }
@@ -110,7 +96,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
                                           const int auxCount, const int lgAuxArrInts,
-                                          const bool srcCompact) {
+                                          const bool srcCompact, const A& allocator) {
   int lgArrInts = lgAuxArrInts;
   if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
     lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
@@ -118,7 +104,7 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
     lgArrInts = lgAuxArrInts;
   }
 
-  AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
+  AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
   typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
   aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
 
@@ -153,23 +139,17 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
 }
 
 template<typename A>
-AuxHashMap<A>::~AuxHashMap<A>() {
-  // should be no way to have an object without a valid array
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
-}
-
-template<typename A>
 std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
   return [](AuxHashMap<A>* ptr) {
+    ahmAlloc alloc(ptr->entries.get_allocator());
     ptr->~AuxHashMap();
-    ahmAlloc().deallocate(ptr, 1);
+    alloc.deallocate(ptr, 1);
   };
 }
 
 template<typename A>
 AuxHashMap<A>* AuxHashMap<A>::copy() const {
-  return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
+  return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
 }
 
 template<typename A>
@@ -179,7 +159,7 @@ int AuxHashMap<A>::getAuxCount() const {
 
 template<typename A>
 int* AuxHashMap<A>::getAuxIntArr(){
-  return auxIntArr;
+  return entries.data();
 }
 
 template<typename A>
@@ -199,7 +179,7 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const {
 
 template<typename A>
 void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
-  const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   const int entry_pair = HllUtil<A>::pair(slotNo, value);
   if (index >= 0) {
     throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
@@ -207,16 +187,16 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
   }
 
   // found empty entry
-  auxIntArr[~index] = entry_pair;
+  entries[~index] = entry_pair;
   ++auxCount;
   checkGrow();
 }
 
 template<typename A>
 int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
-  const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   if (index >= 0) {
-    return HllUtil<A>::getValue(auxIntArr[index]);
+    return HllUtil<A>::getValue(entries[index]);
   }
 
   throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
@@ -224,9 +204,9 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
 
 template<typename A>
 void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
-  const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
+  const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
   if (idx >= 0) {
-    auxIntArr[idx] = HllUtil<A>::pair(slotNo, value);
+    entries[idx] = HllUtil<A>::pair(slotNo, value);
     return;
   }
 
@@ -243,23 +223,18 @@ void AuxHashMap<A>::checkGrow() {
 
 template<typename A>
 void AuxHashMap<A>::growAuxSpace() {
-  int* oldArray = auxIntArr;
-  const int oldArrLen = 1 << lgAuxArrInts;
   const int configKmask = (1 << lgConfigK) - 1;
   const int newArrLen = 1 << ++lgAuxArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  auxIntArr = intAlloc().allocate(newArrLen);
-  std::fill(auxIntArr, auxIntArr + newArrLen, 0);
-  for (int i = 0; i < oldArrLen; ++i) {
-    const int fetched = oldArray[i];
+  vector_int entries_new(newArrLen, 0, entries.get_allocator());
+  for (size_t i = 0; i < entries.size(); ++i) {
+    const int fetched = entries[i];
     if (fetched != HllUtil<A>::EMPTY) {
       // find empty in new array
-      const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask);
-      auxIntArr[~idx] = fetched;
+      const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
+      entries_new[~idx] = fetched;
     }
   }
-
-  intAlloc().deallocate(oldArray, oldArrLen);
+  entries = std::move(entries_new);
 }
 
 //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
@@ -290,12 +265,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
 
 template<typename A>
 coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
-  return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
+  return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
 }
 
 template<typename A>
 coupon_iterator<A> AuxHashMap<A>::end() const {
-  return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
+  return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
 }
 
 }
diff --git a/be/src/thirdparty/datasketches/AuxHashMap.hpp b/be/src/thirdparty/datasketches/AuxHashMap.hpp
index b37e85c..e18f15d 100644
--- a/be/src/thirdparty/datasketches/AuxHashMap.hpp
+++ b/be/src/thirdparty/datasketches/AuxHashMap.hpp
@@ -28,22 +28,21 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class AuxHashMap final {
   public:
-    explicit AuxHashMap(int lgAuxArrInts, int lgConfigK);
-    explicit AuxHashMap(const AuxHashMap<A>& that);
-    static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK);
+    AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
+    static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
     static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
 
     static AuxHashMap* deserialize(const void* bytes, size_t len,
                                    int lgConfigK,
                                    int auxCount, int lgAuxArrInts,
-                                   bool srcCompact);
+                                   bool srcCompact, const A& allocator);
     static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
                                    int auxCount, int lgAuxArrInts,
-                                   bool srcCompact);
-    virtual ~AuxHashMap();
+                                   bool srcCompact, const A& allocator);
+    virtual ~AuxHashMap() = default;
     static std::function<void(AuxHashMap<A>*)> make_deleter();
     
     AuxHashMap* copy() const;
@@ -64,6 +63,8 @@ class AuxHashMap final {
   private:
     typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
 
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
+
     // static so it can be used when resizing
     static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
 
@@ -73,7 +74,7 @@ class AuxHashMap final {
     const int lgConfigK;
     int lgAuxArrInts;
     int auxCount;
-    int* auxIntArr;
+    vector_int entries;
 };
 
 }
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
index 8baecbe..0fa0af8 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class CompositeInterpolationXTable {
   public:
     static int get_y_stride(int logK);
@@ -37,4 +37,4 @@ class CompositeInterpolationXTable {
 
 #include "CompositeInterpolationXTable-internal.hpp"
 
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
diff --git a/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp b/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
index 35facfe..29a3ea7 100644
--- a/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
+++ b/be/src/thirdparty/datasketches/CouponHashSet-internal.hpp
@@ -31,8 +31,8 @@ template<typename A>
 static int find(const int* array, const int lgArrInts, const int coupon);
 
 template<typename A>
-CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
-  : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
+CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType, const A& allocator)
+  : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET, allocator)
 {
   if (lgConfigK <= 7) {
     throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
@@ -41,27 +41,21 @@ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHl
 }
 
 template<typename A>
-CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
-  : CouponList<A>(that) {}
-
-template<typename A>
 CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
   : CouponList<A>(that, tgtHllType) {}
 
 template<typename A>
-CouponHashSet<A>::~CouponHashSet() {}
-
-template<typename A>
 std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
     CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
+    ChsAlloc chsa(chs->getAllocator());
     chs->~CouponHashSet();
-    chsAlloc().deallocate(chs, 1);
+    chsa.deallocate(chs, 1);
   };
 }
 
 template<typename A>
-CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
+CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded 
     throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
   }
@@ -79,7 +73,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
 
   const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
   if (mode != SET) {
-    throw std::invalid_argument("Calling set construtor with non-set mode data");
+    throw std::invalid_argument("Calling set constructor with non-set mode data");
   }
 
   const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -106,7 +100,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
                                 + ", found: " + std::to_string(len));
   }
 
-  CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
+  ChsAlloc chsa(allocator);
+  CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
 
   if (compactFlag) {
     const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
@@ -116,24 +111,19 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
       sketch->couponUpdate(coupon);
     }
   } else {
-    int* oldArr = sketch->couponIntArr;
-    const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
-    sketch->lgCouponArrInts = lgArrInts;
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
+    sketch->coupons.resize(1 << lgArrInts);
     sketch->couponCount = couponCount;
     // only need to read valid coupons, unlike in stream case
-    std::memcpy(sketch->couponIntArr,
+    std::memcpy(sketch->coupons.data(),
                 data + HllUtil<A>::HASH_SET_INT_ARR_START,
                 couponCount * sizeof(int));
-    intAlloc().deallocate(oldArr, oldArrLen);
   }
 
   return sketch;
 }
 
 template<typename A>
-CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
+CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -149,7 +139,7 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
   if (mode != SET) {
-    throw std::invalid_argument("Calling set construtor with non-set mode data");
+    throw std::invalid_argument("Calling set constructor with non-set mode data");
   }
 
   target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -168,7 +158,8 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
     lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
   }
 
-  CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
+  ChsAlloc chsa(allocator);
+  CouponHashSet<A>* sketch = new (chsa.allocate(1)) CouponHashSet<A>(lgK, tgtHllType, allocator);
   typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
   coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
 
@@ -181,13 +172,10 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
       sketch->couponUpdate(coupon);
     }
   } else {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
-    sketch->lgCouponArrInts = lgArrInts;
-    sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
+    sketch->coupons.resize(1 << lgArrInts);
     sketch->couponCount = couponCount;
     // for stream processing, read entire list so read pointer ends up set correctly
-    is.read((char*)sketch->couponIntArr, (1 << sketch->lgCouponArrInts) * sizeof(int));
+    is.read((char*)sketch->coupons.data(), sketch->coupons.size() * sizeof(int));
   } 
 
   if (!is.good())
@@ -198,21 +186,24 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
 
 template<typename A>
 CouponHashSet<A>* CouponHashSet<A>::copy() const {
-  return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this);
+  ChsAlloc chsa(this->coupons.get_allocator());
+  return new (chsa.allocate(1)) CouponHashSet<A>(*this);
 }
 
 template<typename A>
 CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
-  return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this, tgtHllType);
+  ChsAlloc chsa(this->coupons.get_allocator());
+  return new (chsa.allocate(1)) CouponHashSet<A>(*this, tgtHllType);
 }
 
 template<typename A>
 HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
-  const int index = find<A>(this->couponIntArr, this->lgCouponArrInts, coupon);
+  const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
+  const int index = find<A>(this->coupons.data(), lgCouponArrInts, coupon);
   if (index >= 0) {
     return this; // found duplicate, ignore
   }
-  this->couponIntArr[~index] = coupon; // found empty
+  this->coupons[~index] = coupon; // found empty
   ++this->couponCount;
   if (checkGrowOrPromote()) {
     return this->promoteHeapListOrSetToHll(*this);
@@ -232,39 +223,34 @@ int CouponHashSet<A>::getPreInts() const {
 
 template<typename A>
 bool CouponHashSet<A>::checkGrowOrPromote() {
-  if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * (1 << this->lgCouponArrInts))) {
-    if (this->lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
+  if (static_cast<size_t>(HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * this->coupons.size())) {
+    const uint8_t lgCouponArrInts = count_trailing_zeros_in_u32(this->coupons.size());
+    if (lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
       return true; // promote to HLL
     }
-    int tgtLgCoupArrSize = this->lgCouponArrInts + 1;
-    growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
+    growHashSet(lgCouponArrInts + 1);
   }
   return false;
 }
 
 template<typename A>
-void CouponHashSet<A>::growHashSet(const int srcLgCoupArrSize, const int tgtLgCoupArrSize) {
+void CouponHashSet<A>::growHashSet(int tgtLgCoupArrSize) {
   const int tgtLen = 1 << tgtLgCoupArrSize;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
-  std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
+  vector_int coupons_new(tgtLen, 0, this->coupons.get_allocator());
 
-  const int srcLen = 1 << srcLgCoupArrSize;
+  const int srcLen = this->coupons.size();
   for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
-    const int fetched = this->couponIntArr[i];
+    const int fetched = this->coupons[i];
     if (fetched != HllUtil<A>::EMPTY) {
-      const int idx = find<A>(tgtCouponIntArr, tgtLgCoupArrSize, fetched); // search TGT array
+      const int idx = find<A>(coupons_new.data(), tgtLgCoupArrSize, fetched); // search TGT array
       if (idx < 0) { // found EMPTY
-        tgtCouponIntArr[~idx] = fetched; // insert
+        coupons_new[~idx] = fetched; // insert
         continue;
       }
       throw std::runtime_error("Error: Found duplicate coupon");
     }
   }
-
-  intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
-  this->couponIntArr = tgtCouponIntArr;
-  this->lgCouponArrInts = tgtLgCoupArrSize;
+  this->coupons = std::move(coupons_new);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/CouponHashSet.hpp b/be/src/thirdparty/datasketches/CouponHashSet.hpp
index 7aaffc3..b9b99b7 100644
--- a/be/src/thirdparty/datasketches/CouponHashSet.hpp
+++ b/be/src/thirdparty/datasketches/CouponHashSet.hpp
@@ -24,20 +24,20 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class CouponHashSet : public CouponList<A> {
   public:
-    static CouponHashSet* newSet(const void* bytes, size_t len);
-    static CouponHashSet* newSet(std::istream& is);
-    explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
-    explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
-    explicit CouponHashSet(const CouponHashSet& that);
+    static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
+    static CouponHashSet* newSet(std::istream& is, const A& allocator);
+    CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
+    CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
 
-    virtual ~CouponHashSet();
+    virtual ~CouponHashSet() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
   protected:
-    
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
+
     virtual CouponHashSet* copy() const;
     virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
 
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
     friend class HllSketchImplFactory<A>;
 
   private:
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
+    using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
     bool checkGrowOrPromote();
-    void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
+    void growHashSet(int tgtLgCoupArrSize);
 };
 
 }
diff --git a/be/src/thirdparty/datasketches/CouponList-internal.hpp b/be/src/thirdparty/datasketches/CouponList-internal.hpp
index 1800a37..fd304c8 100644
--- a/be/src/thirdparty/datasketches/CouponList-internal.hpp
+++ b/be/src/thirdparty/datasketches/CouponList-internal.hpp
@@ -23,6 +23,7 @@
 #include "CouponList.hpp"
 #include "CubicInterpolation.hpp"
 #include "HllUtil.hpp"
+#include "count_zeros.hpp"
 
 #include <algorithm>
 #include <cmath>
@@ -30,74 +31,45 @@
 namespace datasketches {
 
 template<typename A>
-CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
-  : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
-    if (mode == hll_mode::LIST) {
-      lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
-    } else { // mode == SET
-      lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
-    }
-    oooFlag = false;
-    const int arrayLen = 1 << lgCouponArrInts;
-    typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-    couponIntArr = intAlloc().allocate(arrayLen);
-    std::fill(couponIntArr, couponIntArr + arrayLen, 0);
-    couponCount = 0;
-}
-
-template<typename A>
-CouponList<A>::CouponList(const CouponList& that)
-  : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
-    lgCouponArrInts(that.lgCouponArrInts),
-    couponCount(that.couponCount),
-    oooFlag(that.oooFlag) {
-
-  const int numItems = 1 << lgCouponArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  couponIntArr = intAlloc().allocate(numItems);
-  std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
-}
-
-template<typename A>
-CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
-  : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
-    lgCouponArrInts(that.lgCouponArrInts),
-    couponCount(that.couponCount),
-    oooFlag(that.oooFlag) {
-
-  const int numItems = 1 << lgCouponArrInts;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  couponIntArr = intAlloc().allocate(numItems);
-  std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
-}
+CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
+HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
+couponCount(0),
+oooFlag(false),
+coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
+{}
 
 template<typename A>
-CouponList<A>::~CouponList() {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
-  intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
-}
+CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
+HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
+couponCount(that.couponCount),
+oooFlag(that.oooFlag),
+coupons(that.coupons)
+{}
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
     CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
+    ClAlloc cla(cl->getAllocator());
     cl->~CouponList();
-    clAlloc().deallocate(cl, 1);
+    cla.deallocate(cl, 1);
   };
 }
 
 template<typename A>
 CouponList<A>* CouponList<A>::copy() const {
-  return new (clAlloc().allocate(1)) CouponList<A>(*this);
+  ClAlloc cla(coupons.get_allocator());
+  return new (cla.allocate(1)) CouponList<A>(*this);
 }
 
 template<typename A>
 CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
-  return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
+  ClAlloc cla(coupons.get_allocator());
+  return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
 }
 
 template<typename A>
-CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
+CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::LIST_INT_ARR_START) {
     throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
   }
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
   if (mode != LIST) {
-    throw std::invalid_argument("Calling set construtor with non-list mode data");
+    throw std::invalid_argument("Calling list constructor with non-list mode data");
   }
 
   target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
                                 + ", found: " + std::to_string(len));
   }
 
-  CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
+  ClAlloc cla(allocator);
+  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
   sketch->couponCount = couponCount;
   sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
 
   if (!emptyFlag) {
     // only need to read valid coupons, unlike in stream case
-    std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
+    std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
   }
   
   return sketch;
 }
 
 template<typename A>
-CouponList<A>* CouponList<A>::newList(std::istream& is) {
+CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
 
   hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
   if (mode != LIST) {
-    throw std::invalid_argument("Calling list construtor with non-list mode data");
+    throw std::invalid_argument("Calling list constructor with non-list mode data");
   }
 
   const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
   const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
   const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
 
-  CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
-  typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
+  ClAlloc cla(allocator);
+  CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
+  using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
   coupon_list_ptr ptr(sketch, sketch->get_deleter());
   const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
   sketch->couponCount = couponCount;
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
     // For stream processing, need to read entire number written to stream so read
     // pointer ends up set correctly.
     // If not compact, still need to read empty items even though in order.
-    const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
-    is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
+    const int numToRead = (compact ? couponCount : sketch->coupons.size());
+    is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
   }
 
   if (!is.good())
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
 template<typename A>
 vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
   const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
-  vector_u8<A> byteArr(sketchSizeBytes);
+  vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
   uint8_t* bytes = byteArr.data() + header_size_bytes;
 
   bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
   bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
   bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
   bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
-  bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
+  bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
   bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
   bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
   bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
   const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
   switch (sw) {
     case 0: { // src updatable, dst updatable
-      std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
+      std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
       break;
     }
     case 1: { // src updatable, dst compact
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
   os.write((char*)&familyId, sizeof(familyId));
   const uint8_t lgKByte((uint8_t) this->lgConfigK);
   os.write((char*)&lgKByte, sizeof(lgKByte));
-  const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
+  const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
   os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
   const uint8_t flagsByte(this->makeFlagsByte(compact));
   os.write((char*)&flagsByte, sizeof(flagsByte));
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
   const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
   switch (sw) {
     case 0: { // src updatable, dst updatable
-      os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
+      os.write((char*)coupons.data(), coupons.size() * sizeof(int));
       break;
     }
     case 1: { // src updatable, dst compact
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
 
 template<typename A>
 HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
-  const int len = 1 << lgCouponArrInts;
-  for (int i = 0; i < len; ++i) { // search for empty slot
-    const int couponAtIdx = couponIntArr[i];
+  for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
+    const int couponAtIdx = coupons[i];
     if (couponAtIdx == HllUtil<A>::EMPTY) {
-      couponIntArr[i] = coupon; // the actual update
+      coupons[i] = coupon; // the actual update
       ++couponCount;
-      if (couponCount >= len) { // array full
+      if (couponCount == static_cast<int>(coupons.size())) { // array full
         if (this->lgConfigK < 8) {
           return promoteHeapListOrSetToHll(*this);
         }
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
 
 template<typename A>
 int CouponList<A>::getUpdatableSerializationBytes() const {
-  return getMemDataStart() + (4 << getLgCouponArrInts());
+  return getMemDataStart() + coupons.size() * sizeof(int);
 }
 
 template<typename A>
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
 }
 
 template<typename A>
-int CouponList<A>::getLgCouponArrInts() const {
-  return lgCouponArrInts;
-}
-
-template<typename A>
-int* CouponList<A>::getCouponIntArr() const {
-  return couponIntArr;
+A CouponList<A>::getAllocator() const {
+  return coupons.get_allocator();
 }
 
 template<typename A>
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
 
 template<typename A>
 coupon_iterator<A> CouponList<A>::begin(bool all) const {
-  return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
+  return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
 }
 
 template<typename A>
 coupon_iterator<A> CouponList<A>::end() const {
-  return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
+  return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
 }
 
 }
diff --git a/be/src/thirdparty/datasketches/CouponList.hpp b/be/src/thirdparty/datasketches/CouponList.hpp
index 063805b..c19569e 100644
--- a/be/src/thirdparty/datasketches/CouponList.hpp
+++ b/be/src/thirdparty/datasketches/CouponList.hpp
@@ -30,19 +30,18 @@ namespace datasketches {
 template<typename A>
 class HllSketchImplFactory;
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class CouponList : public HllSketchImpl<A> {
   public:
-    explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode);
-    explicit CouponList(const CouponList& that);
-    explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
+    CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
+    CouponList(const CouponList& that, target_hll_type tgtHllType);
 
-    static CouponList* newList(const void* bytes, size_t len);
-    static CouponList* newList(std::istream& is);
+    static CouponList* newList(const void* bytes, size_t len, const A& allocator);
+    static CouponList* newList(std::istream& is, const A& allocator);
     virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
     virtual void serialize(std::ostream& os, bool compact) const;
 
-    virtual ~CouponList();
+    virtual ~CouponList() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual CouponList* copy() const;
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
     coupon_iterator<A> end() const;
 
   protected:
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
+    using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
+
+    using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
 
     HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
     HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
     virtual bool isOutOfOrderFlag() const;
     virtual void putOutOfOrderFlag(bool oooFlag);
 
-    virtual int getLgCouponArrInts() const;
-    virtual int* getCouponIntArr() const;
+    virtual A getAllocator() const;
 
-    int lgCouponArrInts;
     int couponCount;
     bool oooFlag;
-    int* couponIntArr;
+    vector_int coupons;
 
     friend class HllSketchImplFactory<A>;
 };
diff --git a/be/src/thirdparty/datasketches/CubicInterpolation.hpp b/be/src/thirdparty/datasketches/CubicInterpolation.hpp
index b9cdfe7..58fb7d7 100644
--- a/be/src/thirdparty/datasketches/CubicInterpolation.hpp
+++ b/be/src/thirdparty/datasketches/CubicInterpolation.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class CubicInterpolation {
   public:
     static double usingXAndYTables(const double xArr[], const double yArr[],
@@ -40,4 +40,4 @@ class CubicInterpolation {
 
 #include "CubicInterpolation-internal.hpp"
 
-#endif /* _CUBICINTERPOLATION_HPP_ */
\ No newline at end of file
+#endif /* _CUBICINTERPOLATION_HPP_ */
diff --git a/be/src/thirdparty/datasketches/HarmonicNumbers.hpp b/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
index 501ce0c..34b830a 100644
--- a/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
+++ b/be/src/thirdparty/datasketches/HarmonicNumbers.hpp
@@ -25,7 +25,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class HarmonicNumbers {
   public:
     /**
@@ -45,4 +45,4 @@ class HarmonicNumbers {
 
 #include "HarmonicNumbers-internal.hpp"
 
-#endif /* _HARMONICNUMBERS_HPP_ */
\ No newline at end of file
+#endif /* _HARMONICNUMBERS_HPP_ */
diff --git a/be/src/thirdparty/datasketches/Hll4Array-internal.hpp b/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
index 8498bb8..f93014a 100644
--- a/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll4Array-internal.hpp
@@ -30,13 +30,12 @@
 namespace datasketches {
 
 template<typename A>
-Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) {
+Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
+auxHashMap(nullptr)
+{
   const int numBytes = this->hll4ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-  auxHashMap = nullptr;
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
     Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
+    using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+    Hll4Alloc hll4Alloc(hll->getAllocator());
     hll->~Hll4Array();
-    hll4Alloc().deallocate(hll, 1);
+    hll4Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll4Array<A>* Hll4Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-  return new (hll4Alloc().allocate(1)) Hll4Array<A>(*this);
+  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+  Hll4Alloc hll4Alloc(this->getAllocator());
+  return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
 }
 
 template<typename A>
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
           // added to the exception table
           putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
           if (auxHashMap == nullptr) {
-            auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
+            auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
           }
           auxHashMap->mustAdd(slotNo, newVal);
         }
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
       } else { //newShiftedVal >= AUX_TOKEN
         // the former exception remains an exception, so must be added to the newAuxMap
         if (newAuxMap == nullptr) {
-          newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
+          newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
         }
         newAuxMap->mustAdd(slotNum, oldActualVal);
       }
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
 
 template<typename A>
 typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
-  return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
+  return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
 }
 
 template<typename A>
 typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
-  return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
+  return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll4Array.hpp b/be/src/thirdparty/datasketches/Hll4Array.hpp
index ff56c86..38b2c94 100644
--- a/be/src/thirdparty/datasketches/Hll4Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll4Array.hpp
@@ -31,7 +31,7 @@ class Hll4Iterator;
 template<typename A>
 class Hll4Array final : public HllArray<A> {
   public:
-    explicit Hll4Array(int lgConfigK, bool startFullSize);
+    explicit Hll4Array(int lgConfigK, bool startFullSize, const A& allocator);
     explicit Hll4Array(const Hll4Array<A>& that);
 
     virtual ~Hll4Array();
diff --git a/be/src/thirdparty/datasketches/Hll6Array-internal.hpp b/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
index a318564..e9f6e9f 100644
--- a/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll6Array-internal.hpp
@@ -27,40 +27,29 @@
 namespace datasketches {
 
 template<typename A>
-Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
-  const int numBytes = this->hll6ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-}
-
-template<typename A>
-Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) :
-  HllArray<A>(that)
+Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator)
 {
-  // can determine hllByteArr size in parent class, no need to allocate here
-}
-
-template<typename A>
-Hll6Array<A>::~Hll6Array() {
-  // hllByteArr deleted in parent
+  const int numBytes = this->hll6ArrBytes(lgConfigK);
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
+    using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
     Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr);
+    Hll6Alloc hll6Alloc(hll->getAllocator());
     hll->~Hll6Array();
-    hll6Alloc().deallocate(hll, 1);
+    hll6Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll6Array<A>* Hll6Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-  return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this);
+  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+  Hll6Alloc hll6Alloc(this->getAllocator());
+  return new (hll6Alloc.allocate(1)) Hll6Array<A>(*this);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll6Array.hpp b/be/src/thirdparty/datasketches/Hll6Array.hpp
index 5178de8..03370b2 100644
--- a/be/src/thirdparty/datasketches/Hll6Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll6Array.hpp
@@ -30,10 +30,9 @@ class Hll6Iterator;
 template<typename A>
 class Hll6Array final : public HllArray<A> {
   public:
-    explicit Hll6Array(int lgConfigK, bool startFullSize);
-    explicit Hll6Array(const Hll6Array<A>& that);
+    Hll6Array(int lgConfigK, bool startFullSize, const A& allocator);
 
-    virtual ~Hll6Array();
+    virtual ~Hll6Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual Hll6Array* copy() const;
diff --git a/be/src/thirdparty/datasketches/Hll8Array-internal.hpp b/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
index cb14a0f..f27a796 100644
--- a/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
+++ b/be/src/thirdparty/datasketches/Hll8Array-internal.hpp
@@ -25,40 +25,29 @@
 namespace datasketches {
 
 template<typename A>
-Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
-    HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
-  const int numBytes = this->hll8ArrBytes(lgConfigK);
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  this->hllByteArr = uint8Alloc().allocate(numBytes);
-  std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
-}
-
-template<typename A>
-Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) :
-  HllArray<A>(that)
+Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize, const A& allocator):
+HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator)
 {
-  // can determine hllByteArr size in parent class, no need to allocate here
-}
-
-template<typename A>
-Hll8Array<A>::~Hll8Array() {
-  // hllByteArr deleted in parent
+  const int numBytes = this->hll8ArrBytes(lgConfigK);
+  this->hllByteArr.resize(numBytes, 0);
 }
 
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
     Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
+    using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+    Hll8Alloc hll8Alloc(hll->getAllocator());
     hll->~Hll8Array();
-    hll8Alloc().deallocate(hll, 1);
+    hll8Alloc.deallocate(hll, 1);
   };
 }
 
 template<typename A>
 Hll8Array<A>* Hll8Array<A>::copy() const {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this);
+  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+  Hll8Alloc hll8Alloc(this->getAllocator());
+  return new (hll8Alloc.allocate(1)) Hll8Array<A>(*this);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/Hll8Array.hpp b/be/src/thirdparty/datasketches/Hll8Array.hpp
index 2b0aefc..ea9a5bd 100644
--- a/be/src/thirdparty/datasketches/Hll8Array.hpp
+++ b/be/src/thirdparty/datasketches/Hll8Array.hpp
@@ -30,10 +30,9 @@ class Hll8Iterator;
 template<typename A>
 class Hll8Array final : public HllArray<A> {
   public:
-    explicit Hll8Array(int lgConfigK, bool startFullSize);
-    explicit Hll8Array(const Hll8Array& that);
+    Hll8Array(int lgConfigK, bool startFullSize, const A& allocator);
 
-    virtual ~Hll8Array();
+    virtual ~Hll8Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
 
     virtual Hll8Array<A>* copy() const;
diff --git a/be/src/thirdparty/datasketches/HllArray-internal.hpp b/be/src/thirdparty/datasketches/HllArray-internal.hpp
index 0a4bdce..4479417 100644
--- a/be/src/thirdparty/datasketches/HllArray-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllArray-internal.hpp
@@ -35,48 +35,16 @@
 namespace datasketches {
 
 template<typename A>
-HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
-  : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
-  hipAccum = 0.0;
-  kxq0 = 1 << lgConfigK;
-  kxq1 = 0.0;
-  curMin = 0;
-  numAtCurMin = 1 << lgConfigK;
-  oooFlag = false;
-  hllByteArr = nullptr; // allocated in derived class
-}
-
-template<typename A>
-HllArray<A>::HllArray(const HllArray<A>& that):
-HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
-hipAccum(that.hipAccum),
-kxq0(that.kxq0),
-kxq1(that.kxq1),
-hllByteArr(nullptr),
-curMin(that.curMin),
-numAtCurMin(that.numAtCurMin),
-oooFlag(that.oooFlag)
-{
-  const int arrayLen = that.getHllByteArrBytes();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  hllByteArr = uint8Alloc().allocate(arrayLen);
-  std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
-}
-
-template<typename A>
-HllArray<A>::~HllArray() {
-  // need to determine number of bytes to deallocate
-  int hllArrBytes = 0;
-  if (this->tgtHllType == target_hll_type::HLL_4) {
-    hllArrBytes = hll4ArrBytes(this->lgConfigK);
-  } else if (this->tgtHllType == target_hll_type::HLL_6) {
-    hllArrBytes = hll6ArrBytes(this->lgConfigK);
-  } else { // tgtHllType == HLL_8
-    hllArrBytes = hll8ArrBytes(this->lgConfigK);
-  }
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
-  uint8Alloc().deallocate(hllByteArr, hllArrBytes);
-}
+HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize, const A& allocator):
+HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize),
+hipAccum(0.0),
+kxq0(1 << lgConfigK),
+kxq1(0.0),
+hllByteArr(allocator),
+curMin(0),
+numAtCurMin(1 << lgConfigK),
+oooFlag(false)
+{}
 
 template<typename A>
 HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
@@ -93,7 +61,7 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
 }
 
 template<typename A>
-HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
+HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) {
   if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
     throw std::out_of_range("Input data length insufficient to hold HLL array");
   }
@@ -143,11 +111,11 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
     int auxLgIntArrSize = (int) data[4];
     const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
     const uint8_t* auxDataStart = data + offset;
-    auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
+    auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
     aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
   }
 
-  HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
+  HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
   sketch->putCurMin(curMin);
   sketch->putOutOfOrderFlag(oooFlag);
   if (!oooFlag) sketch->putHipAccum(hip);
@@ -155,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
   sketch->putKxQ1(kxq1);
   sketch->putNumAtCurMin(numAtCurMin);
 
-  std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
+  std::memcpy(sketch->hllByteArr.data(), data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
 
   if (auxHashMap != nullptr)
     ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
@@ -165,7 +133,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
 }
 
 template<typename A>
-HllArray<A>* HllArray<A>::newHll(std::istream& is) {
+HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
   uint8_t listHeader[8];
   is.read((char*)listHeader, 8 * sizeof(uint8_t));
 
@@ -192,7 +160,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
   const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
   const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
 
-  HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
+  HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
   typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
   hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
   sketch->putCurMin(curMin);
@@ -211,11 +179,11 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
   is.read((char*)&auxCount, sizeof(auxCount));
   sketch->putNumAtCurMin(numAtCurMin);
   
-  is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
+  is.read((char*)sketch->hllByteArr.data(), sketch->getHllByteArrBytes());
   
   if (auxCount > 0) { // necessarily TgtHllType == HLL_4
     int auxLgIntArrSize = listHeader[4];
-    AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
+    AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
     ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
   }
 
@@ -228,7 +196,7 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
 template<typename A>
 vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
   const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
-  vector_u8<A> byteArr(sketchSizeBytes);
+  vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
   uint8_t* bytes = byteArr.data() + header_size_bytes;
   AuxHashMap<A>* auxHashMap = getAuxHashMap();
 
@@ -249,7 +217,7 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
   std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
 
   const int hllByteArrBytes = getHllByteArrBytes();
-  std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
+  std::memcpy(bytes + getMemDataStart(), hllByteArr.data(), hllByteArrBytes);
 
   // aux map if HLL_4
   if (this->tgtHllType == HLL_4) {
@@ -309,7 +277,7 @@ void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
 
   const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
   os.write((char*)&auxCount, sizeof(auxCount));
-  os.write((char*)hllByteArr, getHllByteArrBytes());
+  os.write((char*)hllByteArr.data(), getHllByteArrBytes());
 
   // aux map if HLL_4
   if (this->tgtHllType == HLL_4) {
@@ -639,12 +607,12 @@ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum)
 
 template<typename A>
 typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
-  return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
+  return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
 }
 
 template<typename A>
 typename HllArray<A>::const_iterator HllArray<A>::end() const {
-  return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
+  return const_iterator(hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
 }
 
 template<typename A>
@@ -701,6 +669,11 @@ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t inde
   return array[index];
 }
 
+template<typename A>
+A HllArray<A>::getAllocator() const {
+  return hllByteArr.get_allocator();
+}
+
 }
 
 #endif // _HLLARRAY_INTERNAL_HPP_
diff --git a/be/src/thirdparty/datasketches/HllArray.hpp b/be/src/thirdparty/datasketches/HllArray.hpp
index 1cc64ea..e7be8c1 100644
--- a/be/src/thirdparty/datasketches/HllArray.hpp
+++ b/be/src/thirdparty/datasketches/HllArray.hpp
@@ -28,19 +28,18 @@ namespace datasketches {
 template<typename A>
 class AuxHashMap;
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllArray : public HllSketchImpl<A> {
   public:
-    explicit HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize);
-    explicit HllArray(const HllArray<A>& that);
+    HllArray(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
 
-    static HllArray* newHll(const void* bytes, size_t len);
-    static HllArray* newHll(std::istream& is);
+    static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
+    static HllArray* newHll(std::istream& is, const A& allocator);
 
     virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
     virtual void serialize(std::ostream& os, bool compact) const;
 
-    virtual ~HllArray();
+    virtual ~HllArray() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
 
     virtual HllArray* copy() const = 0;
@@ -95,6 +94,8 @@ class HllArray : public HllSketchImpl<A> {
     virtual const_iterator begin(bool all = false) const;
     virtual const_iterator end() const;
 
+    virtual A getAllocator() const;
+
   protected:
     void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
     double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const;
@@ -103,7 +104,7 @@ class HllArray : public HllSketchImpl<A> {
     double hipAccum;
     double kxq0;
     double kxq1;
-    uint8_t* hllByteArr; //init by sub-classes
+    vector_u8<A> hllByteArr; //init by sub-classes
     int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array
     int numAtCurMin; //interpreted as num zeros when curMin == 0
     bool oooFlag; //Out-Of-Order Flag
@@ -115,7 +116,6 @@ template<typename A>
 class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
 public:
   const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
-  //const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
   const_iterator& operator++();
   bool operator!=(const const_iterator& other) const;
   uint32_t operator*() const;
diff --git a/be/src/thirdparty/datasketches/HllSketch-internal.hpp b/be/src/thirdparty/datasketches/HllSketch-internal.hpp
index dd16955..8f7d1f4 100644
--- a/be/src/thirdparty/datasketches/HllSketch-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllSketch-internal.hpp
@@ -42,28 +42,26 @@ typedef union {
 } longDoubleUnion;
 
 template<typename A>
-hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size) {
+hll_sketch_alloc<A>::hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
   HllUtil<A>::checkLgK(lg_config_k);
   if (start_full_size) {
-    sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
+    sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
   } else {
     typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
-    sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
+    sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
   }
 }
 
 template<typename A>
-hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
-  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
-  hll_sketch_alloc<A> sketch(impl);
-  return sketch;
+hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
+  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
+  return hll_sketch_alloc<A>(impl);
 }
 
 template<typename A>
-hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
-  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
-  hll_sketch_alloc<A> sketch(impl);
-  return sketch;
+hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
+  HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
+  return hll_sketch_alloc<A>(impl);
 }
 
 template<typename A>
diff --git a/be/src/thirdparty/datasketches/HllSketchImpl.hpp b/be/src/thirdparty/datasketches/HllSketchImpl.hpp
index 82180b4..9f53705 100644
--- a/be/src/thirdparty/datasketches/HllSketchImpl.hpp
+++ b/be/src/thirdparty/datasketches/HllSketchImpl.hpp
@@ -27,7 +27,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllSketchImpl {
   public:
     HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
@@ -66,6 +66,7 @@ class HllSketchImpl {
     virtual bool isEmpty() const = 0;
     virtual bool isOutOfOrderFlag() const = 0;
     virtual void putOutOfOrderFlag(bool oooFlag) = 0;
+    virtual A getAllocator() const = 0;
     bool isStartFullSize() const;
 
   protected:
diff --git a/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp b/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
index eb8dd77..85f9618 100644
--- a/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
+++ b/be/src/thirdparty/datasketches/HllSketchImplFactory.hpp
@@ -31,15 +31,15 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A>
 class HllSketchImplFactory final {
 public:
-  static HllSketchImpl<A>* deserialize(std::istream& os);
-  static HllSketchImpl<A>* deserialize(const void* bytes, size_t len);
+  static HllSketchImpl<A>* deserialize(std::istream& os, const A& allocator);
+  static HllSketchImpl<A>* deserialize(const void* bytes, size_t len, const A& allocator);
 
   static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
   static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
-  static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false);
+  static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
   
   // resets the input impl, deleting the input pointer and returning a new pointer
   static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
@@ -51,8 +51,8 @@ public:
 
 template<typename A>
 CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
-  typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
-  CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType());
+  using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
+  CouponHashSet<A>* chSet = new (ChsAlloc(list.getAllocator()).allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType(), list.getAllocator());
   for (auto coupon: list) {
     chSet->couponUpdate(coupon);
   }
@@ -61,7 +61,7 @@ CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>&
 
 template<typename A>
 HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
-  HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType());
+  HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType(), false, src.getAllocator());
   tgtHllArr->putKxQ0(1 << src.getLgConfigK());
   for (auto coupon: src) {
     tgtHllArr->couponUpdate(coupon);
@@ -72,48 +72,48 @@ HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>&
 }
 
 template<typename A>
-HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is) {
+HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A& allocator) {
   // we'll hand off the sketch based on PreInts so we don't need
   // to move the stream pointer back and forth -- perhaps somewhat fragile?
   const int preInts = is.peek();
   if (preInts == HllUtil<A>::HLL_PREINTS) {
-    return HllArray<A>::newHll(is);
+    return HllArray<A>::newHll(is, allocator);
   } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
-    return CouponHashSet<A>::newSet(is);
+    return CouponHashSet<A>::newSet(is, allocator);
   } else if (preInts == HllUtil<A>::LIST_PREINTS) {
-    return CouponList<A>::newList(is);
+    return CouponList<A>::newList(is, allocator);
   } else {
     throw std::invalid_argument("Attempt to deserialize unknown object type");
   }
 }
 
 template<typename A>
-HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len) {
+HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
   // read current mode directly
   const int preInts = static_cast<const uint8_t*>(bytes)[0];
   if (preInts == HllUtil<A>::HLL_PREINTS) {
-    return HllArray<A>::newHll(bytes, len);
+    return HllArray<A>::newHll(bytes, len, allocator);
   } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
-    return CouponHashSet<A>::newSet(bytes, len);
+    return CouponHashSet<A>::newSet(bytes, len, allocator);
   } else if (preInts == HllUtil<A>::LIST_PREINTS) {
-    return CouponList<A>::newList(bytes, len);
+    return CouponList<A>::newList(bytes, len, allocator);
   } else {
     throw std::invalid_argument("Attempt to deserialize unknown object type");
   }
 }
 
 template<typename A>
-HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize) {
+HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator) {
   switch (tgtHllType) {
     case HLL_8:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-      return new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, startFullSize);
+      using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+      return new (Hll8Alloc(allocator).allocate(1)) Hll8Array<A>(lgConfigK, startFullSize, allocator);
     case HLL_6:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-      return new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, startFullSize);
+      using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+      return new (Hll6Alloc(allocator).allocate(1)) Hll6Array<A>(lgConfigK, startFullSize, allocator);
     case HLL_4:
-      typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-      return new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, startFullSize);
+      using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+      return new (Hll4Alloc(allocator).allocate(1)) Hll4Array<A>(lgConfigK, startFullSize, allocator);
   }
   throw std::logic_error("Invalid target_hll_type");
 }
@@ -121,12 +121,12 @@ HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtH
 template<typename A>
 HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) {
   if (startFullSize) {
-    HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize);
+    HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize, impl->getAllocator());
     impl->get_deleter()(impl);
     return hll;
   } else {
-    typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
-    CouponList<A>* cl = new (clAlloc().allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST);
+    using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
+    CouponList<A>* cl = new (ClAlloc(impl->getAllocator()).allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST, impl->getAllocator());
     impl->get_deleter()(impl);
     return cl;
   }
@@ -135,8 +135,9 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st
 template<typename A>
 Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
-  Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
+  Hll4Array<A>* hll4Array = new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll4Array->mergeHll(srcHllArr);
   hll4Array->putHipAccum(srcHllArr.getHipAccum());
@@ -146,8 +147,9 @@ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllAr
 template<typename A>
 Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
-  Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
+  Hll6Array<A>* hll6Array = new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll6Array->mergeHll(srcHllArr);
   hll6Array->putHipAccum(srcHllArr.getHipAccum());
@@ -157,8 +159,9 @@ Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllAr
 template<typename A>
 Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
   const int lgConfigK = srcHllArr.getLgConfigK();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize());
+  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
+  Hll8Array<A>* hll8Array = new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1))
+      Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
   hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
   hll8Array->mergeHll(srcHllArr);
   hll8Array->putHipAccum(srcHllArr.getHipAccum());
diff --git a/be/src/thirdparty/datasketches/HllUnion-internal.hpp b/be/src/thirdparty/datasketches/HllUnion-internal.hpp
index 0d12fd3..716fab6 100644
--- a/be/src/thirdparty/datasketches/HllUnion-internal.hpp
+++ b/be/src/thirdparty/datasketches/HllUnion-internal.hpp
@@ -32,9 +32,9 @@
 namespace datasketches {
 
 template<typename A>
-hll_union_alloc<A>::hll_union_alloc(const int lg_max_k):
+hll_union_alloc<A>::hll_union_alloc(const int lg_max_k, const A& allocator):
   lg_max_k(HllUtil<A>::checkLgK(lg_max_k)),
-  gadget(lg_max_k, target_hll_type::HLL_8)
+  gadget(lg_max_k, target_hll_type::HLL_8, false, allocator)
 {}
 
 template<typename A>
@@ -150,16 +150,6 @@ double hll_union_alloc<A>::get_upper_bound(const int num_std_dev) const {
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_compact_serialization_bytes() const {
-  return gadget.get_compact_serialization_bytes();
-}
-
-template<typename A>
-int hll_union_alloc<A>::get_updatable_serialization_bytes() const {
-  return gadget.get_updatable_serialization_bytes();
-}
-
-template<typename A>
 int hll_union_alloc<A>::get_lg_config_k() const {
   return gadget.get_lg_config_k();
 }
@@ -170,11 +160,6 @@ void hll_union_alloc<A>::reset() {
 }
 
 template<typename A>
-bool hll_union_alloc<A>::is_compact() const {
-  return gadget.is_compact();
-}
-
-template<typename A>
 bool hll_union_alloc<A>::is_empty() const {
   return gadget.is_empty();
 }
@@ -195,21 +180,11 @@ bool hll_union_alloc<A>::is_estimation_mode() const {
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_serialization_version() const {
-  return HllUtil<A>::SER_VER;
-}
-
-template<typename A>
 target_hll_type hll_union_alloc<A>::get_target_type() const {
   return target_hll_type::HLL_8;
 }
 
 template<typename A>
-int hll_union_alloc<A>::get_max_serialization_bytes(const int lg_k) {
-  return hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(lg_k, target_hll_type::HLL_8);
-}
-
-template<typename A>
 double hll_union_alloc<A>::get_rel_err(const bool upper_bound, const bool unioned,
                            const int lg_config_k, const int num_std_dev) {
   return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
@@ -226,7 +201,7 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>*
     return src->copyAs(HLL_8);
   }
   typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
+  Hll8Array<A>* tgtHllArr = new (hll8Alloc(src->getAllocator()).allocate(1)) Hll8Array<A>(tgt_lg_k, false, src->getAllocator());
   tgtHllArr->mergeHll(*src);
   //both of these are required for isomorphism
   tgtHllArr->putHipAccum(src->getHipAccum());
diff --git a/be/src/thirdparty/datasketches/HllUtil.hpp b/be/src/thirdparty/datasketches/HllUtil.hpp
index ec0ddf2..3a1ebe2 100644
--- a/be/src/thirdparty/datasketches/HllUtil.hpp
+++ b/be/src/thirdparty/datasketches/HllUtil.hpp
@@ -36,7 +36,7 @@ enum hll_mode { LIST = 0, SET, HLL };
 
 // template provides internal consistency and allows static float values
 // but we don't use the template parameter anywhere
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class HllUtil final {
 public:
   // preamble stuff
diff --git a/be/src/thirdparty/datasketches/MurmurHash3.h b/be/src/thirdparty/datasketches/MurmurHash3.h
index b438c7d..c1cbeab 100644
--- a/be/src/thirdparty/datasketches/MurmurHash3.h
+++ b/be/src/thirdparty/datasketches/MurmurHash3.h
@@ -3,6 +3,7 @@
 //  * Changed input seed in MurmurHash3_x64_128 to uint64_t
 //  * Define and use HashState reference to return result
 //  * Made entire hash function defined inline
+//  * Added compute_seed_hash
 //-----------------------------------------------------------------------------
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
@@ -170,4 +171,10 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
 
 //-----------------------------------------------------------------------------
 
+FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
+  return static_cast<uint16_t>(hashes.h1 & 0xffff);
+}
+
 #endif // _MURMURHASH3_H_
diff --git a/be/src/thirdparty/datasketches/README.md b/be/src/thirdparty/datasketches/README.md
index 1c0433d..adecb97 100644
--- a/be/src/thirdparty/datasketches/README.md
+++ b/be/src/thirdparty/datasketches/README.md
@@ -10,8 +10,8 @@ changed during this process as originally the following folders were affected:
 I copied the content of these folders into the same directory so that Impala
 can compile them without rewriting the include paths in the files themselves.
 
-The git branch of the snapshot I used as a source for the files:
-The hash: b2f749ed5ce6ba650f4259602b133c310c3a5ee4
+The git branch of the snapshot I used as a source for the files: 3.0.0
+The hash: 45885c0c8c0807bb9480886d60ca7042000a4c43
 
 Browse the source files here:
-https://github.com/apache/datasketches-cpp/tree/b2f749ed5ce6ba650f4259602b133c310c3a5ee4
+https://github.com/apache/datasketches-cpp/tree/3.0.0
\ No newline at end of file
diff --git a/be/src/thirdparty/datasketches/RelativeErrorTables.hpp b/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
index da8bebf..5e0a3c7 100644
--- a/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
+++ b/be/src/thirdparty/datasketches/RelativeErrorTables.hpp
@@ -24,7 +24,7 @@
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
+template<typename A = std::allocator<uint8_t>>
 class RelativeErrorTables {
   public:
     /**
diff --git a/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp b/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp
new file mode 100644
index 0000000..e2c5433
--- /dev/null
+++ b/be/src/thirdparty/datasketches/bounds_on_ratios_in_sampled_sets.hpp
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
+#define BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
+
+#include <cstdint>
+#include <string>
+
+#include "bounds_binomial_proportions.hpp"
+
+namespace datasketches {
+
+/**
+ * This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
+ * <ul>
+ * <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
+ * <li><i>|B|</i> is the unknown size of a subset <i>B</i> of <i>A</i>.</li>
+ * <li><i>a</i> = <i>|S<sub>A</sub>|</i> is the observed size of a sample of <i>A</i>
+ * that was obtained by Bernoulli sampling with a known inclusion probability <i>f</i>.</li>
+ * <li><i>b</i> = <i>|S<sub>A</sub> &cap; B|</i> is the observed size of a subset
+ * of <i>S<sub>A</sub></i>.</li>
+ * </ul>
+ */
+class bounds_on_ratios_in_sampled_sets {
+public:
+  static constexpr double NUM_STD_DEVS = 2.0;
+
+  /**
+   * Return the approximate lower bound based on a 95% confidence interval
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i> and should
+   * generally be less than 0.5. Above this value, the results not be reliable.
+   * When <i>f</i> = 1.0 this returns the estimate.
+   * @return the approximate upper bound
+   */
+  static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
+    check_inputs(a, b, f);
+    if (a == 0) return 0.0;
+    if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
+    return bounds_binomial_proportions::approximate_lower_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
+  }
+
+  /**
+   * Return the approximate upper bound based on a 95% confidence interval
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the approximate lower bound
+   */
+  static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
+    check_inputs(a, b, f);
+    if (a == 0) return 1.0;
+    if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
+    return bounds_binomial_proportions::approximate_upper_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
+  }
+
+  /**
+   * Return the estimate of b over a
+   * @param a See class javadoc
+   * @param b See class javadoc
+   * @return the estimate of b over a
+   */
+  static double get_estimate_of_b_over_a(uint64_t a, uint64_t b) {
+    check_inputs(a, b, 0.3);
+    if (a == 0) return 0.5;
+    return static_cast<double>(b) / static_cast<double>(a);
+  }
+
+  /**
+   * Return the estimate of A. See class javadoc.
+   * @param a See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the approximate lower bound
+   */
+  static double estimate_of_a(uint64_t a, uint64_t f) {
+    check_inputs(a, 1, f);
+    return a / f;
+  }
+
+  /**
+   * Return the estimate of B. See class javadoc.
+   * @param b See class javadoc
+   * @param f the inclusion probability used to produce the set with size <i>b</i>.
+   * @return the approximate lower bound
+   */
+  static double estimate_of_b(uint64_t b, double f) {
+    check_inputs(b + 1, b, f);
+    return b / f;
+  }
+
+private:
+  /**
+   * This hackyAdjuster is tightly coupled with the width of the confidence interval normally
+   * specified with number of standard deviations. To simplify this interface the number of
+   * standard deviations has been fixed to 2.0, which corresponds to a confidence interval of
+   * 95%.
+   * @param f the inclusion probability used to produce the set with size <i>a</i>.
+   * @return the hacky Adjuster
+   */
+  static double hacky_adjuster(double f) {
+    const double tmp = sqrt(1.0 - f);
+    return (f <= 0.5) ? tmp : tmp + (0.01 * (f - 0.5));
+  }
+
+  static void check_inputs(uint64_t a, uint64_t b, double f) {
+    if (a < b) {
+      throw std::invalid_argument("a must be >= b: a = " + std::to_string(a) + ", b = " + std::to_string(b));
+    }
+    if ((f > 1.0) || (f <= 0.0)) {
+      throw std::invalid_argument("Required: ((f <= 1.0) && (f > 0.0)): " + std::to_string(f));
+    }
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp b/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp
new file mode 100644
index 0000000..1779ec1
--- /dev/null
+++ b/be/src/thirdparty/datasketches/bounds_on_ratios_in_theta_sketched_sets.hpp
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
+#define BOUNDS_ON_RATIOS_IN_THETA_SKETCHED_SETS_HPP_
+
+#include <cstdint>
+#include <stdexcept>
+
+#include "bounds_on_ratios_in_sampled_sets.hpp"
+
+namespace datasketches {
+
+/**
+ * This is to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
+ * <ul>
+ * <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
+ * <li><i>B</i> is a Theta Sketch of population <i>PopB</i> that is a subset of <i>A</i>,
+ * obtained by an intersection of <i>A</i> with some other Theta Sketch <i>C</i>,
+ * which acts like a predicate or selection clause.</li>
+ * <li>The estimate of the ratio <i>PopB/PopA</i> is
+ * estimate_of_b_over_a(<i>A, B</i>).</li>
+ * <li>The Upper Bound estimate on the ratio PopB/PopA is
+ * upper_bound_for_b_over_a(<i>A, B</i>).</li>
+ * <li>The Lower Bound estimate on the ratio PopB/PopA is
+ * lower_bound_for_b_over_a(<i>A, B</i>).</li>
+ * </ul>
+ * Note: The theta of <i>A</i> cannot be greater than the theta of <i>B</i>.
+ * If <i>B</i> is formed as an intersection of <i>A</i> and some other set <i>C</i>,
+ * then the theta of <i>B</i> is guaranteed to be less than or equal to the theta of <i>B</i>.
+ */
+template<typename ExtractKey>
+class bounds_on_ratios_in_theta_sketched_sets {
+public:
+  /**
+   * Gets the approximate lower bound for B over A based on a 95% confidence interval
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the approximate lower bound for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double lower_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = theta64_a == theta64_b
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 0;
+    const double f = sketch_b.get_theta();
+    return bounds_on_ratios_in_sampled_sets::lower_bound_for_b_over_a(count_a, count_b, f);
+  }
+
+  /**
+   * Gets the approximate upper bound for B over A based on a 95% confidence interval
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the approximate upper bound for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double upper_bound_for_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = (theta64_a == theta64_b)
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 1;
+    const double f = sketch_b.get_theta();
+    return bounds_on_ratios_in_sampled_sets::upper_bound_for_b_over_a(count_a, count_b, f);
+  }
+
+  /**
+   * Gets the estimate for B over A
+   * @param sketchA the sketch A
+   * @param sketchB the sketch B
+   * @return the estimate for B over A
+   */
+  template<typename SketchA, typename SketchB>
+  static double estimate_of_b_over_a(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const uint64_t theta64_a = sketch_a.get_theta64();
+    const uint64_t theta64_b = sketch_b.get_theta64();
+    check_thetas(theta64_a, theta64_b);
+
+    const uint64_t count_b = sketch_b.get_num_retained();
+    const uint64_t count_a = (theta64_a == theta64_b)
+        ? sketch_a.get_num_retained()
+        : count_less_than_theta64(sketch_a, theta64_b);
+
+    if (count_a == 0) return 0.5;
+    return static_cast<double>(count_b) / static_cast<double>(count_a);
+  }
+
+private:
+
+  static inline void check_thetas(uint64_t theta_a, uint64_t theta_b) {
+    if (theta_b > theta_a) {
+      throw std::invalid_argument("theta_a must be <= theta_b");
+    }
+  }
+
+  template<typename Sketch>
+  static uint64_t count_less_than_theta64(const Sketch& sketch, uint64_t theta) {
+    uint64_t count = 0;
+    for (const auto& entry: sketch) if (ExtractKey()(entry) < theta) ++count;
+    return count;
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/cpc_common.hpp b/be/src/thirdparty/datasketches/cpc_common.hpp
index 9a766b8..cde110f 100644
--- a/be/src/thirdparty/datasketches/cpc_common.hpp
+++ b/be/src/thirdparty/datasketches/cpc_common.hpp
@@ -44,6 +44,8 @@ template<typename A> class u32_table;
 
 template<typename A>
 struct compressed_state {
+  explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
+      window_data(allocator), window_data_words(0) {}
   vector_u32<A> table_data;
   uint32_t table_data_words;
   uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
@@ -53,6 +55,7 @@ struct compressed_state {
 
 template<typename A>
 struct uncompressed_state {
+  explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
   u32_table<A> table;
   vector_u8<A> window;
 };
diff --git a/be/src/thirdparty/datasketches/cpc_compressor.hpp b/be/src/thirdparty/datasketches/cpc_compressor.hpp
index 55fa3b8..73db797 100644
--- a/be/src/thirdparty/datasketches/cpc_compressor.hpp
+++ b/be/src/thirdparty/datasketches/cpc_compressor.hpp
@@ -129,14 +129,14 @@ private:
   void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
 
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
+  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
   void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
 
   static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
   static size_t safe_length_for_compressed_window_buf(uint64_t k);
   static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
 
-  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space);
+  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
   static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
 };
 
diff --git a/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp b/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
index b951b05..e3398c8 100644
--- a/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_compressor_impl.hpp
@@ -160,7 +160,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
   switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
     case cpc_sketch_alloc<A>::flavor::EMPTY:
-      target.table = u32_table<A>(2, 6 + lg_k);
+      target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
       break;
     case cpc_sketch_alloc<A>::flavor::SPARSE:
       uncompress_sparse_flavor(source, target, lg_k);
@@ -191,8 +191,9 @@ template<typename A>
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
 }
 
 // This is complicated because it effectively builds a Sparse version
@@ -206,7 +207,7 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
   const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
 
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size());
+  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size(), source.get_allocator());
 
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -221,7 +222,8 @@ template<typename A>
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+      lg_k, source.table_data.get_allocator());
 
   // In the hybrid flavor, some of these pairs actually
   // belong in the window, so we will separate them out,
@@ -240,7 +242,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour
       pairs[next_true_pair++] = row_col; // move true pair down
     }
   }
-  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k, pairs.get_allocator());
 }
 
 template<typename A>
@@ -264,21 +266,23 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
     for (size_t i = 0; i < num_pairs; i++) {
       if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
       pairs[i] += 8;
     }
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -314,15 +318,17 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target,
+    uint8_t lg_k, uint32_t num_coupons) const {
   if (source.window_data.size() == 0) throw std::logic_error("window is expected");
   uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs == 0) {
-    target.table = u32_table<A>(2, 6 + lg_k);
+    target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+        lg_k, source.table_data.get_allocator());
 
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
     if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
@@ -342,7 +348,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
       pairs[i] = (row << 6) | col;
     }
 
-    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k, pairs.get_allocator());
   }
 }
 
@@ -364,9 +370,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u
 }
 
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
+vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs,
+    uint8_t lg_k, const A& allocator) const {
   const size_t k = 1 << lg_k;
-  vector_u32<A> pairs(num_pairs);
+  vector_u32<A> pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
@@ -388,7 +395,8 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l
 }
 
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const {
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window,
+    uint8_t lg_k, uint32_t num_coupons) const {
   const size_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
   const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -710,9 +718,10 @@ void write_unary(
 // The empty space that this leaves at the beginning of the output array
 // will be filled in later by the caller.
 template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) {
+vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
+    uint32_t empty_space, const A& allocator) {
   const size_t output_length = empty_space + num_pairs_to_get;
-  vector_u32<A> pairs(output_length);
+  vector_u32<A> pairs(output_length, 0, allocator);
   size_t pair_index = empty_space;
   for (unsigned row_index = 0; row_index < k; row_index++) {
     uint8_t byte = window[row_index];
diff --git a/be/src/thirdparty/datasketches/cpc_sketch.hpp b/be/src/thirdparty/datasketches/cpc_sketch.hpp
index 9aba16f..a4bf8f6 100644
--- a/be/src/thirdparty/datasketches/cpc_sketch.hpp
+++ b/be/src/thirdparty/datasketches/cpc_sketch.hpp
@@ -49,7 +49,7 @@ template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_union_alloc;
 
 // alias with default allocator for convenience
-typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
+using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>;
 
 // allocation and initialization of global decompression (decoding) tables
 // call this before anything else if you want to control the initialization time
@@ -67,7 +67,10 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+
+  using allocator_type = A;
+  A get_allocator() const;
 
   /**
    * @return configured lg_k of this sketch
@@ -204,7 +207,7 @@ public:
 
   // This is a convenience alias for users
   // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  using vector_bytes = vector_u8<A>;
 
   /**
    * This method serializes the sketch as a vector of bytes.
@@ -221,7 +224,7 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of a sketch
    */
-  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -230,7 +233,7 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of the sketch
    */
-  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   // for internal use
   uint32_t get_num_coupons() const;
diff --git a/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp b/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
index e6bc010..a314de8 100644
--- a/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_sketch_impl.hpp
@@ -41,13 +41,13 @@ void cpc_init() {
 }
 
 template<typename A>
-cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
+cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 was_merged(false),
 num_coupons(0),
-surprising_value_table(2, 6 + lg_k),
-sliding_window(),
+surprising_value_table(2, 6 + lg_k, allocator),
+sliding_window(allocator),
 window_offset(0),
 first_interesting_column(0),
 kxp(1 << lg_k),
@@ -59,6 +59,11 @@ hip_est_accum(0)
 }
 
 template<typename A>
+A cpc_sketch_alloc<A>::get_allocator() const {
+  return sliding_window.get_allocator();
+}
+
+template<typename A>
 uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
   return lg_k;
 }
@@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
 
   sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
 
-  u32_table<A> new_table(2, 6 + lg_k);
+  u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
 
   const uint32_t* old_slots = surprising_value_table.get_slots();
   const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
@@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
 
 template<typename A>
 void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(A(sliding_window.get_allocator()));
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
 
 template<typename A>
 vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
   const uint8_t serial_version = SERIAL_VERSION;
@@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
   const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   if (!is.good())
     throw std::runtime_error("error reading from std::istream"); 
@@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
 }
 
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
@@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
   ensure_minimum_memory(size, preamble_ints << 2);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
       std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
@@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row);
+  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
 
   if (num_coupons == 0) return matrix;
 
diff --git a/be/src/thirdparty/datasketches/cpc_union.hpp b/be/src/thirdparty/datasketches/cpc_union.hpp
index e56aa72..dd59abc 100644
--- a/be/src/thirdparty/datasketches/cpc_union.hpp
+++ b/be/src/thirdparty/datasketches/cpc_union.hpp
@@ -35,7 +35,7 @@ namespace datasketches {
  */
 
 // alias with default allocator for convenience
-typedef cpc_union_alloc<std::allocator<void>> cpc_union;
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 
 template<typename A>
 class cpc_union_alloc {
@@ -45,7 +45,7 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   cpc_union_alloc(const cpc_union_alloc<A>& other);
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
diff --git a/be/src/thirdparty/datasketches/cpc_union_impl.hpp b/be/src/thirdparty/datasketches/cpc_union_impl.hpp
index 65d933c..5acfe5f 100644
--- a/be/src/thirdparty/datasketches/cpc_union_impl.hpp
+++ b/be/src/thirdparty/datasketches/cpc_union_impl.hpp
@@ -25,16 +25,16 @@
 namespace datasketches {
 
 template<typename A>
-cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
+cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 accumulator(nullptr),
-bit_matrix()
+bit_matrix(allocator)
 {
   if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
     throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
   }
-  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
+  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
 
 template<typename A>
@@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
 
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
 
-  vector_u8<A> sliding_window(k);
+  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
 
   // dynamically growing caused snowplow effect
   uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
   if (table_lg_size < 2) table_lg_size = 2;
-  u32_table<A> table(table_lg_size, 6 + lg_k);
+  u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
 
   // the following should work even when the offset is zero
   const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
@@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const size_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0);
+    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;
diff --git a/be/src/thirdparty/datasketches/cpc_util.hpp b/be/src/thirdparty/datasketches/cpc_util.hpp
index b63f26f..1a33b3a 100644
--- a/be/src/thirdparty/datasketches/cpc_util.hpp
+++ b/be/src/thirdparty/datasketches/cpc_util.hpp
@@ -24,12 +24,6 @@
 
 namespace datasketches {
 
-static inline uint16_t compute_seed_hash(uint64_t seed) {
-  HashState hashes;
-  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
-  return hashes.h1 & 0xffff;
-}
-
 static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
   if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
   const uint64_t quotient = x / y;
diff --git a/be/src/thirdparty/datasketches/hll.hpp b/be/src/thirdparty/datasketches/hll.hpp
index 3898dda..a65b945 100644
--- a/be/src/thirdparty/datasketches/hll.hpp
+++ b/be/src/thirdparty/datasketches/hll.hpp
@@ -108,7 +108,7 @@ class hll_union_alloc;
 template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
 template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
 
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class hll_sketch_alloc final {
   public:
     /**
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
      *        keeping memory use constant (if HLL_6 or HLL_8) at the cost of
      *        starting out using much more memory
      */
-    explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false);
+    explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
 
     /**
      * Copy constructor
@@ -140,14 +140,14 @@ class hll_sketch_alloc final {
      * Reconstructs a sketch from a serialized image on a stream.
      * @param is An input stream with a binary image of a sketch
      */
-    static hll_sketch_alloc deserialize(std::istream& is);
+    static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
 
     /**
      * Reconstructs a sketch from a serialized image in a byte array.
      * @param is bytes An input array with a binary image of a sketch
      * @param len Length of the input array, in bytes
      */
-    static hll_sketch_alloc deserialize(const void* bytes, size_t len);
+    static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
 
     //! Class destructor
     virtual ~hll_sketch_alloc();
@@ -423,7 +423,7 @@ class hll_sketch_alloc final {
  * author Kevin Lang
  */
  
-template<typename A = std::allocator<char> >
+template<typename A = std::allocator<uint8_t> >
 class hll_union_alloc {
   public:
     /**
@@ -431,7 +431,7 @@ class hll_union_alloc {
      * @param lg_max_k The maximum size, in log2, of k. The value must
      * be between 7 and 21, inclusive.
      */
-    explicit hll_union_alloc(int lg_max_k);
+    explicit hll_union_alloc(int lg_max_k, const A& allocator = A());
 
     /**
      * Returns the current cardinality estimate
@@ -469,18 +469,6 @@ class hll_union_alloc {
     double get_upper_bound(int num_std_dev) const;
 
     /**
-     * Returns the size of the union serialized in compact form.
-     * @return Size of the union serialized in compact form, in bytes.
-     */
-    int get_compact_serialization_bytes() const;
-
-    /**
-     * Returns the size of the union serialized without compaction.
-     * @return Size of the union serialized without compaction, in bytes.
-     */
-    int get_updatable_serialization_bytes() const;
-
-    /**
      * Returns union's configured lg_k value.
      * @return Configured lg_k value.
      */
@@ -493,12 +481,6 @@ class hll_union_alloc {
     target_hll_type get_target_type() const;
 
     /**
-     * Indicates if the union is currently stored compacted.
-     * @return True if the union is stored in compact form.
-     */
-    bool is_compact() const;
-
-    /**
      * Indicates if the union is currently empty.
      * @return True if the union is empty.
      */
@@ -606,15 +588,6 @@ class hll_union_alloc {
     void update(const void* data, size_t length_bytes);
 
     /**
-     * Returns the maximum size in bytes that this union operator can grow to given a lg_k.
-     *
-     * @param lg_k The maximum Log2 of k for this union operator. This value must be
-     * between 4 and 21 inclusively.
-     * @return the maximum size in bytes that this union operator can grow to.
-     */
-    static int get_max_serialization_bytes(int lg_k);
-
-    /**
      * Gets the current (approximate) Relative Error (RE) asymptotic values given several
      * parameters. This is used primarily for testing.
      * @param upper_bound return the RE for the Upper Bound, otherwise for the Lower Bound.
@@ -645,7 +618,6 @@ class hll_union_alloc {
     void coupon_update(int coupon);
 
     hll_mode get_current_mode() const;
-    int get_serialization_version() const;
     bool is_out_of_order_flag() const;
     bool is_estimation_mode() const;
 
diff --git a/be/src/thirdparty/datasketches/icon_estimator.hpp b/be/src/thirdparty/datasketches/icon_estimator.hpp
index 27d76ca..4a9daea 100644
--- a/be/src/thirdparty/datasketches/icon_estimator.hpp
+++ b/be/src/thirdparty/datasketches/icon_estimator.hpp
@@ -231,7 +231,7 @@ static const double ICON_POLYNOMIAL_COEFFICIENTS[ICON_TABLE_SIZE] = {
 #endif
 };
 
-static double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
+static inline double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
   const int final = start + num - 1;
   double total = coefficients[final];
   for (int j = final - 1; j >= start; j--) {
@@ -241,11 +241,11 @@ static double evaluate_polynomial(const double* coefficients, int start, int num
   return total;
 }
 
-static double icon_exponential_approximation(double k, double c) {
+static inline double icon_exponential_approximation(double k, double c) {
   return (0.7940236163830469 * k * pow(2.0, c / k));
 }
 
-static double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
+static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
   if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
   if (c < 2) return ((c == 0) ? 0.0 : 1.0);
   const size_t k = 1 << lg_k;
diff --git a/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp b/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
index bc60f26..5114399 100644
--- a/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
+++ b/be/src/thirdparty/datasketches/kll_quantile_calculator.hpp
@@ -28,7 +28,7 @@ template <typename T, typename C, typename A>
 class kll_quantile_calculator {
   public:
     // assumes that all levels are sorted including level 0
-    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n);
+    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator);
     T get_quantile(double fraction) const;
 
   private:
diff --git a/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp b/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
index f580819..23efa4d 100644
--- a/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
+++ b/be/src/thirdparty/datasketches/kll_quantile_calculator_impl.hpp
@@ -29,8 +29,8 @@
 namespace datasketches {
 
 template <typename T, typename C, typename A>
-kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n):
-n_(n), levels_(num_levels + 1)
+kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator):
+n_(n), levels_(num_levels + 1, 0, allocator), entries_(allocator)
 {
   const uint32_t num_items = levels[num_levels] - levels[0];
   entries_.reserve(num_items);
@@ -116,7 +116,7 @@ uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint6
 template <typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
   if (num_levels == 1) return;
-  Container temporary;
+  Container temporary(entries.get_allocator());
   temporary.reserve(num_items);
   merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
 }
diff --git a/be/src/thirdparty/datasketches/kll_sketch.hpp b/be/src/thirdparty/datasketches/kll_sketch.hpp
index a4530c9..bbca76f 100644
--- a/be/src/thirdparty/datasketches/kll_sketch.hpp
+++ b/be/src/thirdparty/datasketches/kll_sketch.hpp
@@ -161,7 +161,7 @@ class kll_sketch {
     static const uint16_t MIN_K = DEFAULT_M;
     static const uint16_t MAX_K = (1 << 16) - 1;
 
-    explicit kll_sketch(uint16_t k = DEFAULT_K);
+    explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
     kll_sketch(const kll_sketch& other);
     kll_sketch(kll_sketch&& other) noexcept;
     ~kll_sketch();
@@ -203,6 +203,12 @@ class kll_sketch {
     bool is_empty() const;
 
     /**
+     * Returns configured parameter k
+     * @return parameter k
+     */
+    uint16_t get_k() const;
+
+    /**
      * Returns the length of the input stream.
      * @return stream length
      */
@@ -401,7 +407,7 @@ class kll_sketch {
      * @param is input stream
      * @return an instance of a sketch
      */
-    static kll_sketch deserialize(std::istream& is);
+    static kll_sketch<T, C, S, A> deserialize(std::istream& is, const A& allocator = A());
 
     /**
      * This method deserializes a sketch from a given array of bytes.
@@ -409,7 +415,7 @@ class kll_sketch {
      * @param size the size of the array
      * @return an instance of a sketch
      */
-    static kll_sketch deserialize(const void* bytes, size_t size);
+    static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size, const A& allocator = A());
 
     /*
      * Gets the normalized rank error given k and pmf.
@@ -461,6 +467,7 @@ class kll_sketch {
     static const uint8_t PREAMBLE_INTS_SHORT = 2; // for empty and single item
     static const uint8_t PREAMBLE_INTS_FULL = 5;
 
+    A allocator_;
     uint16_t k_;
     uint8_t m_; // minimum buffer "width"
     uint16_t min_k_; // for error estimation after merging with different k
diff --git a/be/src/thirdparty/datasketches/kll_sketch_impl.hpp b/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
index f0c5ff3..0e0ef87 100644
--- a/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/kll_sketch_impl.hpp
@@ -30,13 +30,14 @@
 namespace datasketches {
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(uint16_t k):
+kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
+allocator_(allocator),
 k_(k),
 m_(DEFAULT_M),
 min_k_(k),
 n_(0),
 num_levels_(1),
-levels_(2),
+levels_(2, 0, allocator),
 items_(nullptr),
 items_size_(k_),
 min_value_(nullptr),
@@ -47,11 +48,12 @@ is_level_zero_sorted_(false)
     throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
   }
   levels_[0] = levels_[1] = k;
-  items_ = A().allocate(items_size_);
+  items_ = allocator_.allocate(items_size_);
 }
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
+allocator_(other.allocator_),
 k_(other.k_),
 m_(other.m_),
 min_k_(other.min_k_),
@@ -64,14 +66,15 @@ min_value_(nullptr),
 max_value_(nullptr),
 is_level_zero_sorted_(other.is_level_zero_sorted_)
 {
-  items_ = A().allocate(items_size_);
+  items_ = allocator_.allocate(items_size_);
   std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
-  if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
-  if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
+  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
 }
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
+allocator_(std::move(other.allocator_)),
 k_(other.k_),
 m_(other.m_),
 min_k_(other.min_k_),
@@ -91,7 +94,8 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
-  kll_sketch copy(other);
+  kll_sketch<T, C, S, A> copy(other);
+  std::swap(allocator_, copy.allocator_);
   std::swap(k_, copy.k_);
   std::swap(m_, copy.m_);
   std::swap(min_k_, copy.min_k_);
@@ -108,6 +112,7 @@ kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& othe
 
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
+  std::swap(allocator_, other.allocator_);
   std::swap(k_, other.k_);
   std::swap(m_, other.m_);
   std::swap(min_k_, other.min_k_);
@@ -128,15 +133,15 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
     const uint32_t begin = levels_[0];
     const uint32_t end = levels_[num_levels_];
     for (uint32_t i = begin; i < end; i++) items_[i].~T();
-    A().deallocate(items_, items_size_);
+    allocator_.deallocate(items_, items_size_);
   }
   if (min_value_ != nullptr) {
     min_value_->~T();
-    A().deallocate(min_value_, 1);
+    allocator_.deallocate(min_value_, 1);
   }
   if (max_value_ != nullptr) {
     max_value_->~T();
-    A().deallocate(max_value_, 1);
+    allocator_.deallocate(max_value_, 1);
   }
 }
 
@@ -159,8 +164,8 @@ void kll_sketch<T, C, S, A>::update(T&& value) {
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(value);
-    max_value_ = new (A().allocate(1)) T(value);
+    min_value_ = new (allocator_.allocate(1)) T(value);
+    max_value_ = new (allocator_.allocate(1)) T(value);
   } else {
     if (C()(value, *min_value_)) *min_value_ = value;
     if (C()(*max_value_, value)) *max_value_ = value;
@@ -182,8 +187,8 @@ void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
     throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
   }
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(*other.min_value_);
-    max_value_ = new (A().allocate(1)) T(*other.max_value_);
+    min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+    max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
   } else {
     if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
     if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
@@ -206,8 +211,8 @@ void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
     throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
   }
   if (is_empty()) {
-    min_value_ = new (A().allocate(1)) T(std::move(*other.min_value_));
-    max_value_ = new (A().allocate(1)) T(std::move(*other.max_value_));
+    min_value_ = new (allocator_.allocate(1)) T(std::move(*other.min_value_));
+    max_value_ = new (allocator_.allocate(1)) T(std::move(*other.max_value_));
   } else {
     if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
     if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
@@ -229,6 +234,11 @@ bool kll_sketch<T, C, S, A>::is_empty() const {
 }
 
 template<typename T, typename C, typename S, typename A>
+uint16_t kll_sketch<T, C, S, A>::get_k() const {
+  return k_;
+}
+
+template<typename T, typename C, typename S, typename A>
 uint64_t kll_sketch<T, C, S, A>::get_n() const {
   return n_;
 }
@@ -270,8 +280,7 @@ T kll_sketch<T, C, S, A>::get_quantile(double fraction) const {
 
 template<typename T, typename C, typename S, typename A>
 std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
-  std::vector<T, A> quantiles;
-  quantiles.reserve(size);
+  std::vector<T, A> quantiles(allocator_);
   if (is_empty()) return quantiles;
   std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
   quantiles.reserve(size);
@@ -295,11 +304,11 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions,
 
 template<typename T, typename C, typename S, typename A>
 std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(size_t num) const {
-  if (is_empty()) return std::vector<T, A>();
+  if (is_empty()) return std::vector<T, A>(allocator_);
   if (num == 0) {
     throw std::invalid_argument("num must be > 0");
   }
-  std::vector<double> fractions(num);
+  vector_d<A> fractions(num, 0, allocator_);
   fractions[0] = 0.0;
   for (size_t i = 1; i < num; i++) {
     fractions[i] = static_cast<double>(i) / (num - 1);
@@ -411,7 +420,7 @@ template<typename T, typename C, typename S, typename A>
 vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const {
   const bool is_single_item = n_ == 1;
   const size_t size = header_size_bytes + get_serialized_size_bytes();
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, allocator_);
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
   const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
@@ -449,7 +458,7 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
 }
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
+kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -472,7 +481,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
 
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
   const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
-  if (is_empty) return kll_sketch(k);
+  if (is_empty) return kll_sketch(k, allocator);
 
   uint64_t n;
   uint16_t min_k;
@@ -488,7 +497,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
     is.read((char*)&num_levels, sizeof(num_levels));
     is.read((char*)&unused, sizeof(unused));
   }
-  vector_u32<A> levels(num_levels + 1);
+  vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -497,41 +506,43 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is) {
     is.read((char*)levels.data(), sizeof(levels[0]) * num_levels);
   }
   levels[num_levels] = capacity;
-  auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value;
-  std::unique_ptr<T, item_deleter> max_value;
+  A alloc(allocator);
+  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
   if (!is_single_item) {
     S().deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     S().deserialize(is, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
-  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
+  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
+  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
   const auto num_items = levels[num_levels] - levels[0];
   S().deserialize(is, &items_buffer.get()[levels[0]], num_items);
   // serde call did not throw, repackage with destrtuctors
-  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
+  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
     new (min_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     new (max_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  if (!is.good())
+    throw std::runtime_error("error reading from std::istream");
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
       std::move(min_value), std::move(max_value), is_level_zero_sorted);
 }
 
 template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size) {
+kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   uint8_t preamble_ints;
@@ -555,7 +566,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
   ensure_minimum_memory(size, 1 << preamble_ints);
 
   const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
-  if (is_empty) return kll_sketch<T, C, S, A>(k);
+  if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
 
   uint64_t n;
   uint16_t min_k;
@@ -572,7 +583,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
     ptr += copy_from_mem(ptr, &num_levels, sizeof(num_levels));
     ptr++; // skip unused byte
   }
-  vector_u32<A> levels(num_levels + 1);
+  vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -581,35 +592,36 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
     ptr += copy_from_mem(ptr, levels.data(), sizeof(levels[0]) * num_levels);
   }
   levels[num_levels] = capacity;
-  auto item_buffer_deleter = [](T* ptr) { A().deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(A().allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value;
-  std::unique_ptr<T, item_deleter> max_value;
+  A alloc(allocator);
+  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
   if (!is_single_item) {
     ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
-  auto items_buffer_deleter = [capacity](T* ptr) { A().deallocate(ptr, capacity); };
-  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(A().allocate(capacity), items_buffer_deleter);
+  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
+  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
   const auto num_items = levels[num_levels] - levels[0];
   ptr += S().deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
   // serde call did not throw, repackage with destrtuctors
-  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity));
+  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
   const size_t delta = ptr - static_cast<const char*>(bytes);
   if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
     new (min_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter());
+    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
     new (max_value_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter());
+    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
       std::move(min_value), std::move(max_value), is_level_zero_sorted);
@@ -634,6 +646,7 @@ template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
     std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
     std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
+allocator_(levels.get_allocator()),
 k_(k),
 m_(DEFAULT_M),
 min_k_(min_k),
@@ -735,9 +748,9 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
   const uint32_t new_total_cap = cur_total_cap + delta_cap;
 
   // move (and shift) the current data into the new buffer
-  T* new_buf = A().allocate(new_total_cap);
+  T* new_buf = allocator_.allocate(new_total_cap);
   kll_helper::move_construct<T>(items_, 0, cur_total_cap, new_buf, delta_cap, true);
-  A().deallocate(items_, items_size_);
+  allocator_.deallocate(items_, items_size_);
   items_ = new_buf;
   items_size_ = new_total_cap;
 
@@ -763,19 +776,20 @@ void kll_sketch<T, C, S, A>::sort_level_zero() {
 template<typename T, typename C, typename S, typename A>
 std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> kll_sketch<T, C, S, A>::get_quantile_calculator() {
   sort_level_zero();
-  typedef typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>> AllocCalc;
+  using AllocCalc = typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>>;
+  AllocCalc alloc(allocator_);
   std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
-    new (AllocCalc().allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_),
-    [](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); AllocCalc().deallocate(ptr, 1); }
+    new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_, allocator_),
+    [&alloc](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); alloc.deallocate(ptr, 1); }
   );
   return quantile_calculator;
 }
 
 template<typename T, typename C, typename S, typename A>
 vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
-  if (is_empty()) return vector_d<A>();
+  if (is_empty()) return vector_d<A>(allocator_);
   kll_helper::validate_values<T, C>(split_points, size);
-  vector_d<A> buckets(size + 1, 0);
+  vector_d<A> buckets(size + 1, 0, allocator_);
   uint8_t level = 0;
   uint64_t weight = 1;
   while (level < num_levels_) {
@@ -845,12 +859,13 @@ template<typename T, typename C, typename S, typename A>
 template<typename O>
 void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
   const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
-  auto tmp_items_deleter = [tmp_num_items](T* ptr) { A().deallocate(ptr, tmp_num_items); }; // no destructor needed
-  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(A().allocate(tmp_num_items), tmp_items_deleter);
+  A alloc(allocator_);
+  auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
+  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
   const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
   const size_t work_levels_size = ub + 2; // ub+1 does not work
-  vector_u32<A> worklevels(work_levels_size);
-  vector_u32<A> outlevels(work_levels_size);
+  vector_u32<A> worklevels(work_levels_size, 0, allocator_);
+  vector_u32<A> outlevels(work_levels_size, 0, allocator_);
 
   const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
 
@@ -864,9 +879,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
 
   // now we need to transfer the results back into "this" sketch
   if (result.final_capacity != items_size_) {
-    A().deallocate(items_, items_size_);
+    allocator_.deallocate(items_, items_size_);
     items_size_ = result.final_capacity;
-    items_ = A().allocate(items_size_);
+    items_ = allocator_.allocate(items_size_);
   }
   const uint32_t free_space_at_bottom = result.final_capacity - result.final_num_items;
   kll_helper::move_construct<T>(workbuf.get(), outlevels[0], outlevels[0] + result.final_num_items, items_, free_space_at_bottom, true);
@@ -1101,29 +1116,32 @@ const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator
 template<typename T, typename C, typename S, typename A>
 class kll_sketch<T, C, S, A>::item_deleter {
   public:
-  void operator() (T* ptr) const {
+  item_deleter(const A& allocator): allocator_(allocator) {}
+  void operator() (T* ptr) {
     if (ptr != nullptr) {
       ptr->~T();
-      A().deallocate(ptr, 1);
+      allocator_.deallocate(ptr, 1);
     }
   }
+  private:
+  A allocator_;
 };
 
 template<typename T, typename C, typename S, typename A>
 class kll_sketch<T, C, S, A>::items_deleter {
   public:
-  items_deleter(uint32_t start, uint32_t num): start(start), num(num) {}
-  void operator() (T* ptr) const {
+  items_deleter(uint32_t start, uint32_t num, const A& allocator):
+    allocator_(allocator), start_(start), num_(num) {}
+  void operator() (T* ptr) {
     if (ptr != nullptr) {
-      for (uint32_t i = start; i < num; ++i) {
-        ptr[i].~T();
-      }
-      A().deallocate(ptr, num);
+      for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
+      allocator_.deallocate(ptr, num_);
     }
   }
   private:
-  uint32_t start;
-  uint32_t num;
+  A allocator_;
+  uint32_t start_;
+  uint32_t num_;
 };
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/memory_operations.hpp b/be/src/thirdparty/datasketches/memory_operations.hpp
index 80dc3a3..986b2b0 100644
--- a/be/src/thirdparty/datasketches/memory_operations.hpp
+++ b/be/src/thirdparty/datasketches/memory_operations.hpp
@@ -52,6 +52,18 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
   return size;
 }
 
+template<typename T>
+static inline size_t copy_to_mem(const T& item, void* dst) {
+  memcpy(dst, &item, sizeof(T));
+  return sizeof(T);
+}
+
+template<typename T>
+static inline size_t copy_from_mem(const void* src, T& item) {
+  memcpy(&item, src, sizeof(T));
+  return sizeof(T);
+}
+
 } // namespace
 
 #endif // _MEMORY_OPERATIONS_HPP_
diff --git a/be/src/thirdparty/datasketches/theta_a_not_b.hpp b/be/src/thirdparty/datasketches/theta_a_not_b.hpp
index db66ac7..4beef60 100644
--- a/be/src/thirdparty/datasketches/theta_a_not_b.hpp
+++ b/be/src/thirdparty/datasketches/theta_a_not_b.hpp
@@ -20,51 +20,34 @@
 #ifndef THETA_A_NOT_B_HPP_
 #define THETA_A_NOT_B_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
 #include "theta_sketch.hpp"
-#include "common_defs.hpp"
+#include "theta_set_difference_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_a_not_b_alloc {
 public:
-  /**
-   * Creates an instance of the a-not-b operation (set difference) with a given has seed.
-   * @param seed hash seed
-   */
-  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED);
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+  using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, Allocator>;
+
+  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Computes the a-not-b set operation given two sketches.
    * @return the result of a-not-b
    */
-  compact_theta_sketch_alloc<A> compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered = true) const;
+  template<typename FwdSketch, typename Sketch>
+  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  uint16_t seed_hash_;
-
-  class less_than {
-  public:
-    explicit less_than(uint64_t value): value(value) {}
-    bool operator()(uint64_t value) const { return value < this->value; }
-  private:
-    uint64_t value;
-  };
+  State state_;
 };
 
 // alias with default allocator for convenience
-typedef theta_a_not_b_alloc<std::allocator<void>> theta_a_not_b;
+using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
diff --git a/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp b/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
index 4343ee3..4c17bbf 100644
--- a/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_a_not_b_impl.hpp
@@ -26,56 +26,15 @@
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_a_not_b_alloc<A>::theta_a_not_b_alloc(uint64_t seed):
-seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
+theta_a_not_b_alloc<A>::theta_a_not_b_alloc(uint64_t seed, const A& allocator):
+state_(seed, allocator)
 {}
 
 template<typename A>
-compact_theta_sketch_alloc<A> theta_a_not_b_alloc<A>::compute(const theta_sketch_alloc<A>& a, const theta_sketch_alloc<A>& b, bool ordered) const {
-  if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return compact_theta_sketch_alloc<A>(a, ordered);
-  if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
-  if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");
-
-  const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
-  vector_u64<A> keys;
-  bool is_empty = a.is_empty();
-
-  if (b.get_num_retained() == 0) {
-    std::copy_if(a.begin(), a.end(), std::back_inserter(keys), less_than(theta));
-  } else {
-    if (a.is_ordered() && b.is_ordered()) { // sort-based
-      std::set_difference(a.begin(), a.end(), b.begin(), b.end(), conditional_back_inserter(keys, less_than(theta)));
-    } else { // hash-based
-      const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-      vector_u64<A> b_hash_table(1 << lg_size, 0);
-      for (auto key: b) {
-        if (key < theta) {
-          update_theta_sketch_alloc<A>::hash_search_or_insert(key, b_hash_table.data(), lg_size);
-        } else if (b.is_ordered()) {
-          break; // early stop
-        }
-      }
-
-      // scan A lookup B
-      for (auto key: a) {
-        if (key < theta) {
-          if (!update_theta_sketch_alloc<A>::hash_search(key, b_hash_table.data(), lg_size)) keys.push_back(key);
-        } else if (a.is_ordered()) {
-          break; // early stop
-        }
-      }
-    }
-  }
-  if (keys.empty() && theta == theta_sketch_alloc<A>::MAX_THETA) is_empty = true;
-  if (ordered && !a.is_ordered()) std::sort(keys.begin(), keys.end());
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash_, a.is_ordered() || ordered);
+template<typename FwdSketch, typename Sketch>
+auto theta_a_not_b_alloc<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
+  return state_.compute(std::forward<FwdSketch>(a), b, ordered);
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/CubicInterpolation.hpp b/be/src/thirdparty/datasketches/theta_comparators.hpp
similarity index 57%
copy from be/src/thirdparty/datasketches/CubicInterpolation.hpp
copy to be/src/thirdparty/datasketches/theta_comparators.hpp
index b9cdfe7..e8a39b7 100644
--- a/be/src/thirdparty/datasketches/CubicInterpolation.hpp
+++ b/be/src/thirdparty/datasketches/theta_comparators.hpp
@@ -17,27 +17,32 @@
  * under the License.
  */
 
-#ifndef _CUBICINTERPOLATION_HPP_
-#define _CUBICINTERPOLATION_HPP_
-
-#include <memory>
+#ifndef THETA_COMPARATORS_HPP_
+#define THETA_COMPARATORS_HPP_
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CubicInterpolation {
-  public:
-    static double usingXAndYTables(const double xArr[], const double yArr[],
-                                   int len, double x);
-
-    static double usingXAndYTables(double x);
-
-    static double usingXArrAndYStride(const double xArr[], const int xArrLen,
-                                      double yStride, double x);
+template<typename ExtractKey>
+struct compare_by_key {
+  template<typename Entry1, typename Entry2>
+  bool operator()(Entry1&& a, Entry2&& b) const {
+    return ExtractKey()(std::forward<Entry1>(a)) < ExtractKey()(std::forward<Entry2>(b));
+  }
 };
 
-}
+// less than
+
+template<typename Key, typename Entry, typename ExtractKey>
+class key_less_than {
+public:
+  explicit key_less_than(const Key& key): key(key) {}
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) < this->key;
+  }
+private:
+  Key key;
+};
 
-#include "CubicInterpolation-internal.hpp"
+} /* namespace datasketches */
 
-#endif /* _CUBICINTERPOLATION_HPP_ */
\ No newline at end of file
+#endif
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/theta_constants.hpp
similarity index 66%
copy from be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
copy to be/src/thirdparty/datasketches/theta_constants.hpp
index 8baecbe..d5d6fd9 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/theta_constants.hpp
@@ -17,24 +17,20 @@
  * under the License.
  */
 
-#ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
-#define _COMPOSITEINTERPOLATIONXTABLE_HPP_
+#ifndef THETA_CONSTANTS_HPP_
+#define THETA_CONSTANTS_HPP_
 
-#include <memory>
+#include <climits>
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CompositeInterpolationXTable {
-  public:
-    static int get_y_stride(int logK);
-
-    static const double* get_x_arr(int logK);
-    static int get_x_arr_length();
-};
-
+namespace theta_constants {
+  enum resize_factor { X1, X2, X4, X8 };
+  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
+  static const uint8_t MIN_LG_K = 5;
+  static const uint8_t MAX_LG_K = 26;
 }
 
-#include "CompositeInterpolationXTable-internal.hpp"
+} /* namespace datasketches */
 
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_helpers.hpp b/be/src/thirdparty/datasketches/theta_helpers.hpp
new file mode 100644
index 0000000..6852590
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_helpers.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_HELPERS_HPP_
+#define THETA_HELPERS_HPP_
+
+#include <string>
+#include <stdexcept>
+
+namespace datasketches {
+
+template<typename T>
+static void check_value(T actual, T expected, const char* description) {
+  if (actual != expected) {
+    throw std::invalid_argument(std::string(description) + " mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
+  }
+}
+
+template<bool dummy>
+class checker {
+public:
+  static void check_serial_version(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "serial version");
+  }
+  static void check_sketch_family(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "sketch family");
+  }
+  static void check_sketch_type(uint8_t actual, uint8_t expected) {
+    check_value(actual, expected, "sketch type");
+  }
+  static void check_seed_hash(uint16_t actual, uint16_t expected) {
+    check_value(actual, expected, "seed hash");
+  }
+};
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection.hpp b/be/src/thirdparty/datasketches/theta_intersection.hpp
index 5945c52..98a8bf1 100644
--- a/be/src/thirdparty/datasketches/theta_intersection.hpp
+++ b/be/src/thirdparty/datasketches/theta_intersection.hpp
@@ -20,29 +20,28 @@
 #ifndef THETA_INTERSECTION_HPP_
 #define THETA_INTERSECTION_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
 #include "theta_sketch.hpp"
-#include "common_defs.hpp"
+#include "theta_intersection_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_intersection_alloc {
 public:
-  /**
-   * Creates an instance of the intersection with a given hash seed.
-   * @param seed hash seed
-   */
-  explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED);
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_alloc<Allocator>;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
+  using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+  explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Updates the intersection with a given sketch.
@@ -50,7 +49,8 @@ public:
    * can reduce the current set to leave the overlapping subset only.
    * @param sketch represents input set for the intersection
    */
-  void update(const theta_sketch_alloc<A>& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   /**
    * Produces a copy of the current state of the intersection.
@@ -59,7 +59,7 @@ public:
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return the result of the intersection
    */
-  compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
+  CompactSketch get_result(bool ordered = true) const;
 
   /**
    * Returns true if the state of the intersection is defined (not infinite "universe").
@@ -68,21 +68,14 @@ public:
   bool has_result() const;
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  bool is_valid_;
-  bool is_empty_;
-  uint64_t theta_;
-  uint8_t lg_size_;
-  vector_u64<A> keys_;
-  uint32_t num_keys_;
-  uint16_t seed_hash_;
+  State state_;
 };
 
 // alias with default allocator for convenience
-typedef theta_intersection_alloc<std::allocator<void>> theta_intersection;
+using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
 #include "theta_intersection_impl.hpp"
 
-# endif
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection_base.hpp b/be/src/thirdparty/datasketches/theta_intersection_base.hpp
new file mode 100644
index 0000000..c034590
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_intersection_base.hpp
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_INTERSECTION_BASE_HPP_
+#define THETA_INTERSECTION_BASE_HPP_
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Policy,
+  typename Sketch,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_intersection_base {
+public:
+  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename hash_table::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+  theta_intersection_base(uint64_t seed, const Policy& policy, const Allocator& allocator);
+
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
+
+  CompactSketch get_result(bool ordered = true) const;
+
+  bool has_result() const;
+
+  const Policy& get_policy() const;
+
+private:
+  Policy policy_;
+  bool is_valid_;
+  hash_table table_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_intersection_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp b/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp
new file mode 100644
index 0000000..286f0ca
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_intersection_base_impl.hpp
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+theta_intersection_base<EN, EK, P, S, CS, A>::theta_intersection_base(uint64_t seed, const P& policy, const A& allocator):
+policy_(policy),
+is_valid_(false),
+table_(0, 0, resize_factor::X1, theta_constants::MAX_THETA, seed, allocator, false)
+{}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+template<typename SS>
+void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
+  if (table_.is_empty_) return;
+  if (!sketch.is_empty() && sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+  table_.is_empty_ |= sketch.is_empty();
+  table_.theta_ = std::min(table_.theta_, sketch.get_theta64());
+  if (is_valid_ && table_.num_entries_ == 0) return;
+  if (sketch.get_num_retained() == 0) {
+    is_valid_ = true;
+    table_ = hash_table(0, 0, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+    return;
+  }
+  if (!is_valid_) { // first update, copy or move incoming sketch
+    is_valid_ = true;
+    const uint8_t lg_size = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
+    table_ = hash_table(lg_size, lg_size, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+    for (auto& entry: sketch) {
+      auto result = table_.find(EK()(entry));
+      if (result.second) {
+        throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
+      }
+      table_.insert(result.first, conditional_forward<SS>(entry));
+    }
+    if (table_.num_entries_ != sketch.get_num_retained()) throw std::invalid_argument("num entries mismatch, possibly corrupted input sketch");
+  } else { // intersection
+    const uint32_t max_matches = std::min(table_.num_entries_, sketch.get_num_retained());
+    std::vector<EN, A> matched_entries(table_.allocator_);
+    matched_entries.reserve(max_matches);
+    uint32_t match_count = 0;
+    uint32_t count = 0;
+    for (auto& entry: sketch) {
+      if (EK()(entry) < table_.theta_) {
+        auto result = table_.find(EK()(entry));
+        if (result.second) {
+          if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
+          policy_(*result.first, conditional_forward<SS>(entry));
+          matched_entries.push_back(std::move(*result.first));
+          ++match_count;
+        }
+      } else if (sketch.is_ordered()) {
+        break; // early stop
+      }
+      ++count;
+    }
+    if (count > sketch.get_num_retained()) {
+      throw std::invalid_argument(" more keys than expected, possibly corrupted input sketch");
+    } else if (!sketch.is_ordered() && count < sketch.get_num_retained()) {
+      throw std::invalid_argument(" fewer keys than expected, possibly corrupted input sketch");
+    }
+    if (match_count == 0) {
+      table_ = hash_table(0, 0, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+      if (table_.theta_ == theta_constants::MAX_THETA) table_.is_empty_ = true;
+    } else {
+      const uint8_t lg_size = lg_size_from_count(match_count, theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
+      table_ = hash_table(lg_size, lg_size, resize_factor::X1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
+      for (uint32_t i = 0; i < match_count; i++) {
+        auto result = table_.find(EK()(matched_entries[i]));
+        table_.insert(result.first, std::move(matched_entries[i]));
+      }
+    }
+  }
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+  if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
+  std::vector<EN, A> entries(table_.allocator_);
+  if (table_.num_entries_ > 0) {
+    entries.reserve(table_.num_entries_);
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
+    if (ordered) std::sort(entries.begin(), entries.end(), comparator());
+  }
+  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), table_.theta_, std::move(entries));
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+bool theta_intersection_base<EN, EK, P, S, CS, A>::has_result() const {
+  return is_valid_;
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+const P& theta_intersection_base<EN, EK, P, S, CS, A>::get_policy() const {
+  return policy_;
+}
+
+} /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/theta_intersection_impl.hpp b/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
index d090b3a..a0c4291 100644
--- a/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_intersection_impl.hpp
@@ -20,109 +20,27 @@
 #ifndef THETA_INTERSECTION_IMPL_HPP_
 #define THETA_INTERSECTION_IMPL_HPP_
 
-#include <algorithm>
-
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_intersection_alloc<A>::theta_intersection_alloc(uint64_t seed):
-is_valid_(false),
-is_empty_(false),
-theta_(theta_sketch_alloc<A>::MAX_THETA),
-lg_size_(0),
-keys_(),
-num_keys_(0),
-seed_hash_(theta_sketch_alloc<A>::get_seed_hash(seed))
+theta_intersection_alloc<A>::theta_intersection_alloc(uint64_t seed, const A& allocator):
+state_(seed, pass_through_policy(), allocator)
 {}
 
 template<typename A>
-void theta_intersection_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
-  if (is_empty_) return;
-  if (!sketch.is_empty() && sketch.get_seed_hash() != seed_hash_) throw std::invalid_argument("seed hash mismatch");
-  is_empty_ |= sketch.is_empty();
-  theta_ = std::min(theta_, sketch.get_theta64());
-  if (is_valid_ && num_keys_ == 0) return;
-  if (sketch.get_num_retained() == 0) {
-    is_valid_ = true;
-    if (keys_.size() > 0) {
-      keys_.resize(0);
-      lg_size_ = 0;
-      num_keys_ = 0;
-    }
-    return;
-  }
-  if (!is_valid_) { // first update, clone incoming sketch
-    is_valid_ = true;
-    lg_size_ = lg_size_from_count(sketch.get_num_retained(), update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-    keys_.resize(1 << lg_size_, 0);
-    for (auto key: sketch) {
-      if (!update_theta_sketch_alloc<A>::hash_search_or_insert(key, keys_.data(), lg_size_)) {
-        throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
-      }
-      ++num_keys_;
-    }
-    if (num_keys_ != sketch.get_num_retained()) throw std::invalid_argument("num keys mismatch, possibly corrupted input sketch");
-  } else { // intersection
-    const uint32_t max_matches = std::min(num_keys_, sketch.get_num_retained());
-    vector_u64<A> matched_keys(max_matches);
-    uint32_t match_count = 0;
-    uint32_t count = 0;
-    for (auto key: sketch) {
-      if (key < theta_) {
-        if (update_theta_sketch_alloc<A>::hash_search(key, keys_.data(), lg_size_)) {
-          if (match_count == max_matches) throw std::invalid_argument("max matches exceeded, possibly corrupted input sketch");
-          matched_keys[match_count++] = key;
-        }
-      } else if (sketch.is_ordered()) {
-        break; // early stop
-      }
-      ++count;
-    }
-    if (count > sketch.get_num_retained()) {
-      throw std::invalid_argument(" more keys then expected, possibly corrupted input sketch");
-    } else if (!sketch.is_ordered() && count < sketch.get_num_retained()) {
-      throw std::invalid_argument(" fewer keys then expected, possibly corrupted input sketch");
-    }
-    if (match_count == 0) {
-      keys_.resize(0);
-      lg_size_ = 0;
-      num_keys_ = 0;
-      if (theta_ == theta_sketch_alloc<A>::MAX_THETA) is_empty_ = true;
-    } else {
-      const uint8_t lg_size = lg_size_from_count(match_count, update_theta_sketch_alloc<A>::REBUILD_THRESHOLD);
-      if (lg_size != lg_size_) {
-        lg_size_ = lg_size;
-        keys_.resize(1 << lg_size_);
-      }
-      std::fill(keys_.begin(), keys_.end(), 0);
-      for (uint32_t i = 0; i < match_count; i++) {
-        update_theta_sketch_alloc<A>::hash_search_or_insert(matched_keys[i], keys_.data(), lg_size_);
-      }
-      num_keys_ = match_count;
-    }
-  }
+template<typename SS>
+void theta_intersection_alloc<A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> theta_intersection_alloc<A>::get_result(bool ordered) const {
-  if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
-  vector_u64<A> keys(num_keys_);
-  if (num_keys_ > 0) {
-    std::copy_if(keys_.begin(), keys_.end(), keys.begin(), [](uint64_t key) { return key != 0; });
-    if (ordered) std::sort(keys.begin(), keys.end());
-  }
-  return compact_theta_sketch_alloc<A>(is_empty_, theta_, std::move(keys), seed_hash_, ordered);
+auto theta_intersection_alloc<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
 }
 
 template<typename A>
 bool theta_intersection_alloc<A>::has_result() const {
-  return is_valid_;
+  return state_.has_result();
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp b/be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
similarity index 59%
copy from be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
copy to be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
index 8baecbe..417ed54 100644
--- a/be/src/thirdparty/datasketches/CompositeInterpolationXTable.hpp
+++ b/be/src/thirdparty/datasketches/theta_jaccard_similarity.hpp
@@ -17,24 +17,21 @@
  * under the License.
  */
 
-#ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
-#define _COMPOSITEINTERPOLATIONXTABLE_HPP_
+#ifndef THETA_JACCARD_SIMILARITY_HPP_
+#define THETA_JACCARD_SIMILARITY_HPP_
 
-#include <memory>
+#include "theta_jaccard_similarity_base.hpp"
+#include "theta_union.hpp"
+#include "theta_intersection.hpp"
 
 namespace datasketches {
 
-template<typename A = std::allocator<char>>
-class CompositeInterpolationXTable {
-  public:
-    static int get_y_stride(int logK);
+template<typename Allocator = std::allocator<uint64_t>>
+using theta_jaccard_similarity_alloc = jaccard_similarity_base<theta_union_alloc<Allocator>, theta_intersection_alloc<Allocator>, trivial_extract_key>;
 
-    static const double* get_x_arr(int logK);
-    static int get_x_arr_length();
-};
+// alias with default allocator for convenience
+using theta_jaccard_similarity = theta_jaccard_similarity_alloc<std::allocator<uint64_t>>;
 
-}
+} /* namespace datasketches */
 
-#include "CompositeInterpolationXTable-internal.hpp"
-
-#endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
\ No newline at end of file
+# endif
diff --git a/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp b/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp
new file mode 100644
index 0000000..cb18601
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_jaccard_similarity_base.hpp
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_JACCARD_SIMILARITY_BASE_HPP_
+#define THETA_JACCARD_SIMILARITY_BASE_HPP_
+
+#include <memory>
+#include <array>
+
+#include "theta_constants.hpp"
+#include "bounds_on_ratios_in_theta_sketched_sets.hpp"
+#include "ceiling_power_of_2.hpp"
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+template<typename Union, typename Intersection, typename ExtractKey>
+class jaccard_similarity_base {
+public:
+
+  /**
+   * Computes the Jaccard similarity index with upper and lower bounds. The Jaccard similarity index
+   * <i>J(A,B) = (A ^ B)/(A U B)</i> is used to measure how similar the two sketches are to each
+   * other. If J = 1.0, the sketches are considered equal. If J = 0, the two sketches are
+   * disjoint. A Jaccard of .95 means the overlap between the two
+   * sets is 95% of the union of the two sets.
+   *
+   * <p>Note: For very large pairs of sketches, where the configured nominal entries of the sketches
+   * are 2^25 or 2^26, this method may produce unpredictable results.
+   *
+   * @param sketch_a given sketch A
+   * @param sketch_b given sketch B
+   * @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
+   * The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
+   */
+  template<typename SketchA, typename SketchB>
+  static std::array<double, 3> jaccard(const SketchA& sketch_a, const SketchB& sketch_b) {
+    if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return {1, 1, 1};
+    if (sketch_a.is_empty() && sketch_b.is_empty()) return {1, 1, 1};
+    if (sketch_a.is_empty() || sketch_b.is_empty()) return {0, 0, 0};
+
+    auto union_ab = compute_union(sketch_a, sketch_b);
+    if (identical_sets(sketch_a, sketch_b, union_ab)) return {1, 1, 1};
+
+    // intersection
+    Intersection i;
+    i.update(sketch_a);
+    i.update(sketch_b);
+    i.update(union_ab); // ensures that intersection is a subset of the union
+    auto inter_abu = i.get_result(false);
+
+    return {
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::lower_bound_for_b_over_a(union_ab, inter_abu),
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::estimate_of_b_over_a(union_ab, inter_abu),
+      bounds_on_ratios_in_theta_sketched_sets<ExtractKey>::upper_bound_for_b_over_a(union_ab, inter_abu)
+    };
+  }
+
+  /**
+   * Returns true if the two given sketches are equivalent.
+   * @param sketch_a the given sketch A
+   * @param sketch_b the given sketch B
+   * @return true if the two given sketches are exactly equal
+   */
+  template<typename SketchA, typename SketchB>
+  static bool exactly_equal(const SketchA& sketch_a, const SketchB& sketch_b) {
+    if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return true;
+    if (sketch_a.is_empty() && sketch_b.is_empty()) return true;
+    if (sketch_a.is_empty() || sketch_b.is_empty()) return false;
+
+    auto union_ab = compute_union(sketch_a, sketch_b);
+    if (identical_sets(sketch_a, sketch_b, union_ab)) return true;
+    return false;
+  }
+
+  /**
+   * Tests similarity of an actual Sketch against an expected Sketch.
+   * Computes the lower bound of the Jaccard index <i>J<sub>LB</sub></i> of the actual and
+   * expected sketches.
+   * if <i>J<sub>LB</sub> &ge; threshold</i>, then the sketches are considered to be
+   * similar with a confidence of 97.7%.
+   *
+   * @param actual the sketch to be tested
+   * @param expected the reference sketch that is considered to be correct
+   * @param threshold a real value between zero and one
+   * @return true if the similarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence
+   */
+  template<typename SketchA, typename SketchB>
+  static bool similarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
+    auto jc = jaccard(actual, expected);
+    return jc[0] >= threshold;
+  }
+
+  /**
+   * Tests dissimilarity of an actual Sketch against an expected Sketch.
+   * Computes the upper bound of the Jaccard index <i>J<sub>UB</sub></i> of the actual and
+   * expected sketches.
+   * if <i>J<sub>UB</sub> &le; threshold</i>, then the sketches are considered to be
+   * dissimilar with a confidence of 97.7%.
+   *
+   * @param actual the sketch to be tested
+   * @param expected the reference sketch that is considered to be correct
+   * @param threshold a real value between zero and one
+   * @return true if the dissimilarity of the two sketches is greater than the given threshold
+   * with at least 97.7% confidence
+   */
+  template<typename SketchA, typename SketchB>
+  static bool dissimilarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
+    auto jc = jaccard(actual, expected);
+    return jc[2] <= threshold;
+  }
+
+private:
+
+  template<typename SketchA, typename SketchB>
+  static typename Union::CompactSketch compute_union(const SketchA& sketch_a, const SketchB& sketch_b) {
+    const unsigned count_a = sketch_a.get_num_retained();
+    const unsigned count_b = sketch_b.get_num_retained();
+    const unsigned lg_k = std::min(std::max(log2(ceiling_power_of_2(count_a + count_b)), theta_constants::MIN_LG_K), theta_constants::MAX_LG_K);
+    auto u = typename Union::builder().set_lg_k(lg_k).build();
+    u.update(sketch_a);
+    u.update(sketch_b);
+    return u.get_result(false);
+  }
+
+  template<typename SketchA, typename SketchB, typename UnionAB>
+  static bool identical_sets(const SketchA& sketch_a, const SketchB& sketch_b, const UnionAB& union_ab) {
+    if (union_ab.get_num_retained() == sketch_a.get_num_retained() &&
+        union_ab.get_num_retained() == sketch_b.get_num_retained() &&
+        union_ab.get_theta64() == sketch_a.get_theta64() &&
+        union_ab.get_theta64() == sketch_b.get_theta64()) return true;
+    return false;
+  }
+
+};
+
+} /* namespace datasketches */
+
+# endif
diff --git a/be/src/thirdparty/datasketches/theta_set_difference_base.hpp b/be/src/thirdparty/datasketches/theta_set_difference_base.hpp
new file mode 100644
index 0000000..5cc601f
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_set_difference_base.hpp
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_SET_DIFFERENCE_BASE_HPP_
+#define THETA_SET_DIFFERENCE_BASE_HPP_
+
+#include "theta_comparators.hpp"
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_set_difference_base {
+public:
+  using comparator = compare_by_key<ExtractKey>;
+  using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
+  using hash_table = theta_update_sketch_base<uint64_t, trivial_extract_key, AllocU64>;
+
+  theta_set_difference_base(uint64_t seed, const Allocator& allocator = Allocator());
+
+  template<typename FwdSketch, typename Sketch>
+  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered) const;
+
+private:
+  Allocator allocator_;
+  uint16_t seed_hash_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_set_difference_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp b/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp
new file mode 100644
index 0000000..4ab98a8
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_set_difference_base_impl.hpp
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_A_SET_DIFFERENCE_BASE_IMPL_HPP_
+#define THETA_A_SET_DIFFERENCE_BASE_IMPL_HPP_
+
+#include <algorithm>
+
+#include "conditional_back_inserter.hpp"
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename CS, typename A>
+theta_set_difference_base<EN, EK, CS, A>::theta_set_difference_base(uint64_t seed, const A& allocator):
+allocator_(allocator),
+seed_hash_(compute_seed_hash(seed))
+{}
+
+template<typename EN, typename EK, typename CS, typename A>
+template<typename FwdSketch, typename Sketch>
+CS theta_set_difference_base<EN, EK, CS, A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const {
+  if (a.is_empty() || a.get_num_retained() == 0 || b.is_empty()) return CS(a, ordered);
+  if (a.get_seed_hash() != seed_hash_) throw std::invalid_argument("A seed hash mismatch");
+  if (b.get_seed_hash() != seed_hash_) throw std::invalid_argument("B seed hash mismatch");
+
+  const uint64_t theta = std::min(a.get_theta64(), b.get_theta64());
+  std::vector<EN, A> entries(allocator_);
+  bool is_empty = a.is_empty();
+
+  if (b.get_num_retained() == 0) {
+    std::copy_if(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), std::back_inserter(entries),
+        key_less_than<uint64_t, EN, EK>(theta));
+  } else {
+    if (a.is_ordered() && b.is_ordered()) { // sort-based
+      std::set_difference(forward_begin(std::forward<FwdSketch>(a)), forward_end(std::forward<FwdSketch>(a)), b.begin(), b.end(),
+          conditional_back_inserter(entries, key_less_than<uint64_t, EN, EK>(theta)), comparator());
+    } else { // hash-based
+      const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), hash_table::REBUILD_THRESHOLD);
+      hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 0, 0, allocator_); // theta and seed are not used here
+      for (const auto& entry: b) {
+        const uint64_t hash = EK()(entry);
+        if (hash < theta) {
+          table.insert(table.find(hash).first, hash);
+        } else if (b.is_ordered()) {
+          break; // early stop
+        }
+      }
+
+      // scan A lookup B
+      for (auto& entry: a) {
+        const uint64_t hash = EK()(entry);
+        if (hash < theta) {
+          auto result = table.find(hash);
+          if (!result.second) entries.push_back(conditional_forward<FwdSketch>(entry));
+        } else if (a.is_ordered()) {
+          break; // early stop
+        }
+      }
+    }
+  }
+  if (entries.empty() && theta == theta_constants::MAX_THETA) is_empty = true;
+  if (ordered && !a.is_ordered()) std::sort(entries.begin(), entries.end(), comparator());
+  return CS(is_empty, a.is_ordered() || ordered, seed_hash_, theta, std::move(entries));
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_sketch.hpp b/be/src/thirdparty/datasketches/theta_sketch.hpp
index b809f71..2e24168 100644
--- a/be/src/thirdparty/datasketches/theta_sketch.hpp
+++ b/be/src/thirdparty/datasketches/theta_sketch.hpp
@@ -20,45 +20,29 @@
 #ifndef THETA_SKETCH_HPP_
 #define THETA_SKETCH_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-#include <vector>
-
-#include "common_defs.hpp"
+#include "theta_update_sketch_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-// forward-declarations
-template<typename A> class theta_sketch_alloc;
-template<typename A> class update_theta_sketch_alloc;
-template<typename A> class compact_theta_sketch_alloc;
-template<typename A> class theta_union_alloc;
-template<typename A> class theta_intersection_alloc;
-template<typename A> class theta_a_not_b_alloc;
-
-// for serialization as raw bytes
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_sketch_alloc {
 public:
-  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
-  static const uint8_t SERIAL_VERSION = 3;
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using iterator = theta_iterator<Entry, ExtractKey>;
+  using const_iterator = theta_const_iterator<Entry, ExtractKey>;
 
   virtual ~theta_sketch_alloc() = default;
 
   /**
+   * @return allocator
+   */
+  virtual Allocator get_allocator() const = 0;
+
+  /**
    * @return true if this sketch represents an empty set (not the same as no retained entries!)
    */
-  bool is_empty() const;
+  virtual bool is_empty() const = 0;
 
   /**
    * @return estimate of the distinct count of the input stream
@@ -96,13 +80,16 @@ public:
   /**
    * @return theta as a positive integer between 0 and LLONG_MAX
    */
-  uint64_t get_theta64() const;
+  virtual uint64_t get_theta64() const = 0;
 
   /**
    * @return the number of retained entries in the sketch
    */
   virtual uint32_t get_num_retained() const = 0;
 
+  /**
+   * @return hash of the seed that was used to hash the input
+   */
   virtual uint16_t get_seed_hash() const = 0;
 
   /**
@@ -111,109 +98,82 @@ public:
   virtual bool is_ordered() const = 0;
 
   /**
-   * Writes a human-readable summary of this sketch to a given stream
+   * Provides a human-readable summary of this sketch as a string
    * @param print_items if true include the list of items retained by the sketch
+   * @return sketch summary as a string
    */
-  virtual string<A> to_string(bool print_items = false) const = 0;
-
-  /**
-   * This method serializes the sketch into a given stream in a binary form
-   * @param os output stream
-   */
-  virtual void serialize(std::ostream& os) const = 0;
-
-  // This is a convenience alias for users
-  // The type returned by the following serialize method
-  typedef vector_u8<A> vector_bytes;
+  virtual string<Allocator> to_string(bool print_items = false) const;
 
   /**
-   * This method serializes the sketch as a vector of bytes.
-   * An optional header can be reserved in front of the sketch.
-   * It is an uninitialized space of a given size.
-   * This header is used in Datasketches PostgreSQL extension.
-   * @param header_size_bytes space to reserve in front of the sketch
-   */
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const = 0;
-
-  // This is a convenience alias for users
-  // The type returned by the following deserialize methods
-  // It is not possible to return instances of an abstract type, so this has to be a pointer
-  typedef std::unique_ptr<theta_sketch_alloc<A>, std::function<void(theta_sketch_alloc<A>*)>> unique_ptr;
-
-  /**
-   * This method deserializes a sketch from a given stream.
-   * @param is input stream
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch as a unique_ptr
+   * Iterator over hash values in this sketch.
+   * @return begin iterator
    */
-  static unique_ptr deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  virtual iterator begin() = 0;
 
   /**
-   * This method deserializes a sketch from a given array of bytes.
-   * @param bytes pointer to the array of bytes
-   * @param size the size of the array
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of the sketch
+   * Iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
    */
-  static unique_ptr deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
-
-  class const_iterator;
+  virtual iterator end() = 0;
 
   /**
-   * Iterator over hash values in this sketch.
+   * Const iterator over hash values in this sketch.
    * @return begin iterator
    */
   virtual const_iterator begin() const = 0;
 
   /**
-   * Iterator pointing past the valid range.
+   * Const iterator pointing past the valid range.
    * Not to be incremented or dereferenced.
    * @return end iterator
    */
   virtual const_iterator end() const = 0;
 
 protected:
-  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
-
-  bool is_empty_;
-  uint64_t theta_;
-
-  theta_sketch_alloc(bool is_empty, uint64_t theta);
-
-  static uint16_t get_seed_hash(uint64_t seed);
-
-  static void check_sketch_type(uint8_t actual, uint8_t expected);
-  static void check_serial_version(uint8_t actual, uint8_t expected);
-  static void check_seed_hash(uint16_t actual, uint16_t expected);
-
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
+  using ostrstream = std::basic_ostringstream<char, std::char_traits<char>, AllocChar<Allocator>>;
+  virtual void print_specifics(ostrstream& os) const = 0;
 };
 
-// update sketch
-
-template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
-template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
+// forward declaration
+template<typename A> class compact_theta_sketch_alloc;
 
-template<typename A>
-class update_theta_sketch_alloc: public theta_sketch_alloc<A> {
+template<typename Allocator = std::allocator<uint64_t>>
+class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
 public:
-  class builder;
-  enum resize_factor { X1, X2, X4, X8 };
-  static const uint8_t SKETCH_TYPE = 2;
+  using Base = theta_sketch_alloc<Allocator>;
+  using Entry = typename Base::Entry;
+  using ExtractKey = typename Base::ExtractKey;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename theta_table::resize_factor;
 
   // No constructor here. Use builder instead.
+  class builder;
 
+  update_theta_sketch_alloc(const update_theta_sketch_alloc&) = default;
+  update_theta_sketch_alloc(update_theta_sketch_alloc&&) noexcept = default;
   virtual ~update_theta_sketch_alloc() = default;
+  update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc&) = default;
+  update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&&) = default;
 
-  virtual uint32_t get_num_retained() const;
-  virtual uint16_t get_seed_hash() const;
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
   virtual bool is_ordered() const;
-  virtual string<A> to_string(bool print_items = false) const;
-  virtual void serialize(std::ostream& os) const;
-  typedef vector_u8<A> vector_bytes; // alias for users
-  // header space is reserved, but not initialized
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const;
+  virtual uint16_t get_seed_hash() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+
+  /**
+   * @return configured nominal number of entries in the sketch
+   */
+  uint8_t get_lg_k() const;
+
+  /**
+   * @return configured resize factor of the sketch
+   */
+  resize_factor get_rf() const;
 
   /**
    * Update this sketch with a given string.
@@ -302,7 +262,7 @@ public:
    * @param data pointer to the data
    * @param length of the data in bytes
    */
-  void update(const void* data, unsigned length);
+  void update(const void* data, size_t length);
 
   /**
    * Remove retained entries in excess of the nominal size k (if any)
@@ -314,105 +274,85 @@ public:
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return compact sketch
    */
-  compact_theta_sketch_alloc<A> compact(bool ordered = true) const;
-
-  virtual typename theta_sketch_alloc<A>::const_iterator begin() const;
-  virtual typename theta_sketch_alloc<A>::const_iterator end() const;
-
-  /**
-   * This method deserializes a sketch from a given stream.
-   * @param is input stream
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch
-   */
-  static update_theta_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  compact_theta_sketch_alloc<Allocator> compact(bool ordered = true) const;
 
-  /**
-   * This method deserializes a sketch from a given array of bytes.
-   * @param bytes pointer to the array of bytes
-   * @param size the size of the array
-   * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of the sketch
-   */
-  static update_theta_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
 private:
-  // resize threshold = 0.5 tuned for speed
-  static constexpr double RESIZE_THRESHOLD = 0.5;
-  // hash table rebuild threshold = 15/16
-  static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
-
-  static constexpr uint8_t STRIDE_HASH_BITS = 7;
-  static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
-
-  uint8_t lg_cur_size_;
-  uint8_t lg_nom_size_;
-  vector_u64<A> keys_;
-  uint32_t num_keys_;
-  resize_factor rf_;
-  float p_;
-  uint64_t seed_;
-  uint32_t capacity_;
+  theta_table table_;
 
   // for builder
-  update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
-
-  // for deserialize
-  update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed);
+  update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator);
 
-  void resize();
-  void rebuild();
-
-  friend theta_union_alloc<A>;
-  void internal_update(uint64_t hash);
-
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
-  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
-  static inline uint32_t get_stride(uint64_t hash, uint8_t lg_size);
-  static bool hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size);
-  static bool hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size);
-
-  friend theta_sketch_alloc<A>;
-  static update_theta_sketch_alloc<A> internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed);
-  static update_theta_sketch_alloc<A> internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed);
+  using ostrstream = typename Base::ostrstream;
+  virtual void print_specifics(ostrstream& os) const;
 };
 
 // compact sketch
 
-template<typename A>
-class compact_theta_sketch_alloc: public theta_sketch_alloc<A> {
+template<typename Allocator = std::allocator<uint64_t>>
+class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
 public:
+  using Base = theta_sketch_alloc<Allocator>;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using AllocBytes = typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  static const uint8_t SERIAL_VERSION = 3;
   static const uint8_t SKETCH_TYPE = 3;
 
-  // No constructor here.
   // Instances of this type can be obtained:
-  // - by compacting an update_theta_sketch
+  // - by compacting an update_theta_sketch_alloc
   // - as a result of a set operation
   // - by deserializing a previously serialized compact sketch
 
-  compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered);
+  compact_theta_sketch_alloc(const Base& other, bool ordered);
+  compact_theta_sketch_alloc(const compact_theta_sketch_alloc&) = default;
+  compact_theta_sketch_alloc(compact_theta_sketch_alloc&&) noexcept = default;
   virtual ~compact_theta_sketch_alloc() = default;
+  compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc&) = default;
+  compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&&) = default;
 
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
   virtual uint32_t get_num_retained() const;
   virtual uint16_t get_seed_hash() const;
-  virtual bool is_ordered() const;
-  virtual string<A> to_string(bool print_items = false) const;
-  virtual void serialize(std::ostream& os) const;
-  typedef vector_u8<A> vector_bytes; // alias for users
-  // header space is reserved, but not initialized
-  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const;
 
-  virtual typename theta_sketch_alloc<A>::const_iterator begin() const;
-  virtual typename theta_sketch_alloc<A>::const_iterator end() const;
+  /**
+   * This method serializes the sketch into a given stream in a binary form
+   * @param os output stream
+   */
+  void serialize(std::ostream& os) const;
+
+  /**
+   * This method serializes the sketch as a vector of bytes.
+   * An optional header can be reserved in front of the sketch.
+   * It is an uninitialized space of a given size.
+   * This header is used in Datasketches PostgreSQL extension.
+   * @param header_size_bytes space to reserve in front of the sketch
+   */
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
+
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
   /**
    * This method deserializes a sketch from a given stream.
    * @param is input stream
    * @param seed the seed for the hash function that was used to create the sketch
-   * @return an instance of a sketch
+   * @return an instance of the sketch
    */
-  static compact_theta_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+  static compact_theta_sketch_alloc deserialize(std::istream& is,
+      uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * This method deserializes a sketch from a given array of bytes.
@@ -421,110 +361,36 @@ public:
    * @param seed the seed for the hash function that was used to create the sketch
    * @return an instance of the sketch
    */
-  static compact_theta_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+  static compact_theta_sketch_alloc deserialize(const void* bytes, size_t size,
+      uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+  // for internal use
+  compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
+  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
 
-  vector_u64<A> keys_;
-  uint16_t seed_hash_;
+  bool is_empty_;
   bool is_ordered_;
+  uint16_t seed_hash_;
+  uint64_t theta_;
+  std::vector<uint64_t, Allocator> entries_;
 
-  friend theta_sketch_alloc<A>;
-  friend update_theta_sketch_alloc<A>;
-  friend theta_union_alloc<A>;
-  friend theta_intersection_alloc<A>;
-  friend theta_a_not_b_alloc<A>;
-  compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered);
-  static compact_theta_sketch_alloc<A> internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
-  static compact_theta_sketch_alloc<A> internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
-};
-
-// builder
-
-template<typename A>
-class update_theta_sketch_alloc<A>::builder {
-public:
-  static const uint8_t MIN_LG_K = 5;
-  static const uint8_t DEFAULT_LG_K = 12;
-  static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
-
-  /**
-   * Creates and instance of the builder with default parameters.
-   */
-  builder();
-
-  /**
-   * Set log2(k), where k is a nominal number of entries in the sketch
-   * @param lg_k base 2 logarithm of nominal number of entries
-   * @return this builder
-   */
-  builder& set_lg_k(uint8_t lg_k);
-
-  /**
-   * Set resize factor for the internal hash table (defaults to 8)
-   * @param rf resize factor
-   * @return this builder
-   */
-  builder& set_resize_factor(resize_factor rf);
-
-  /**
-   * Set sampling probability (initial theta). The default is 1, so the sketch retains
-   * all entries until it reaches the limit, at which point it goes into the estimation mode
-   * and reduces the effective sampling probability (theta) as necessary.
-   * @param p sampling probability
-   * @return this builder
-   */
-  builder& set_p(float p);
-
-  /**
-   * Set the seed for the hash function. Should be used carefully if needed.
-   * Sketches produced with different seed are not compatible
-   * and cannot be mixed in set operations.
-   * @param seed hash seed
-   * @return this builder
-   */
-  builder& set_seed(uint64_t seed);
-
-  /**
-   * This is to create an instance of the sketch with predefined parameters.
-   * @return and instance of the sketch
-   */
-  update_theta_sketch_alloc<A> build() const;
-
-private:
-  uint8_t lg_k_;
-  resize_factor rf_;
-  float p_;
-  uint64_t seed_;
-
-  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
+  using ostrstream = typename Base::ostrstream;
+  virtual void print_specifics(ostrstream& os) const;
 };
 
-// iterator
-template<typename A>
-class theta_sketch_alloc<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint64_t> {
+template<typename Allocator>
+class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
 public:
-  const_iterator& operator++();
-  const_iterator operator++(int);
-  bool operator==(const const_iterator& other) const;
-  bool operator!=(const const_iterator& other) const;
-  uint64_t operator*() const;
-
-private:
-  const uint64_t* keys_;
-  uint32_t size_;
-  uint32_t index_;
-  const_iterator(const uint64_t* keys, uint32_t size, uint32_t index);
-  friend class update_theta_sketch_alloc<A>;
-  friend class compact_theta_sketch_alloc<A>;
+    builder(const Allocator& allocator = Allocator());
+    update_theta_sketch_alloc build() const;
 };
 
-
 // aliases with default allocator for convenience
-typedef theta_sketch_alloc<std::allocator<void>> theta_sketch;
-typedef update_theta_sketch_alloc<std::allocator<void>> update_theta_sketch;
-typedef compact_theta_sketch_alloc<std::allocator<void>> compact_theta_sketch;
+using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
+using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
+using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
diff --git a/be/src/thirdparty/datasketches/theta_sketch_impl.hpp b/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
index 579a675..1335e59 100644
--- a/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_sketch_impl.hpp
@@ -20,35 +20,23 @@
 #ifndef THETA_SKETCH_IMPL_HPP_
 #define THETA_SKETCH_IMPL_HPP_
 
-#include <algorithm>
-#include <cmath>
-#include <memory>
-#include <functional>
-#include <istream>
-#include <ostream>
 #include <sstream>
+#include <vector>
 
-#include "MurmurHash3.h"
 #include "serde.hpp"
 #include "binomial_bounds.hpp"
-#include "memory_operations.hpp"
+#include "theta_helpers.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
 template<typename A>
-theta_sketch_alloc<A>::theta_sketch_alloc(bool is_empty, uint64_t theta):
-is_empty_(is_empty), theta_(theta)
-{}
+bool theta_sketch_alloc<A>::is_estimation_mode() const {
+  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
+}
 
 template<typename A>
-bool theta_sketch_alloc<A>::is_empty() const {
-  return is_empty_;
+double theta_sketch_alloc<A>::get_theta() const {
+  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
 }
 
 template<typename A>
@@ -69,182 +57,47 @@ double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
 }
 
 template<typename A>
-bool theta_sketch_alloc<A>::is_estimation_mode() const {
-  return theta_ < MAX_THETA && !is_empty_;
-}
-
-template<typename A>
-double theta_sketch_alloc<A>::get_theta() const {
-  return (double) theta_ / MAX_THETA;
-}
-
-template<typename A>
-uint64_t theta_sketch_alloc<A>::get_theta64() const {
-  return theta_;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
-  uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t type;
-  is.read((char*)&type, sizeof(type));
-  uint8_t lg_nom_size;
-  is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-
-  check_serial_version(serial_version, SERIAL_VERSION);
-
-  if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    check_seed_hash(seed_hash, get_seed_hash(seed));
-    typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
-    typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(update_theta_sketch_alloc<A>::internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed))),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-    if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
-    typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  }
-  throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
-  ensure_minimum_memory(size, static_cast<size_t>(8));
-  const char* ptr = static_cast<const char*>(bytes);
-  uint8_t preamble_longs;
-  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
-  uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
-  uint8_t type;
-  ptr += copy_from_mem(ptr, &type, sizeof(type));
-  uint8_t lg_nom_size;
-  ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-
-  check_serial_version(serial_version, SERIAL_VERSION);
-
-  if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    check_seed_hash(seed_hash, get_seed_hash(seed));
-    typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
-    typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(
-        update_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed))
-      ),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
-    const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-    if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
-    typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
-    return unique_ptr(
-      static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
-        compact_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash))
-      ),
-      [](theta_sketch_alloc<A>* ptr) {
-        ptr->~theta_sketch_alloc();
-        AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
-      }
-    );
-  }
-  throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
-}
-
-template<typename A>
-uint16_t theta_sketch_alloc<A>::get_seed_hash(uint64_t seed) {
-  HashState hashes;
-  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
-  return hashes.h1;
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_sketch_type(uint8_t actual, uint8_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch type mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
-  }
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_serial_version(uint8_t actual, uint8_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch serial version mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
-  }
-}
-
-template<typename A>
-void theta_sketch_alloc<A>::check_seed_hash(uint16_t actual, uint16_t expected) {
-  if (actual != expected) {
-    throw std::invalid_argument("Sketch seed hash mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
+string<A> theta_sketch_alloc<A>::to_string(bool detail) const {
+  ostrstream os;
+  os << "### Theta sketch summary:" << std::endl;
+  os << "   num retained entries : " << get_num_retained() << std::endl;
+  os << "   seed hash            : " << get_seed_hash() << std::endl;
+  os << "   empty?               : " << (is_empty() ? "true" : "false") << std::endl;
+  os << "   ordered?             : " << (is_ordered() ? "true" : "false") << std::endl;
+  os << "   estimation mode?     : " << (is_estimation_mode() ? "true" : "false") << std::endl;
+  os << "   theta (fraction)     : " << get_theta() << std::endl;
+  os << "   theta (raw 64-bit)   : " << get_theta64() << std::endl;
+  os << "   estimate             : " << this->get_estimate() << std::endl;
+  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
+  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
+  print_specifics(os);
+  os << "### End sketch summary" << std::endl;
+  if (detail) {
+    os << "### Retained entries" << std::endl;
+    for (const auto& hash: *this) {
+      os << hash << std::endl;
+    }
+    os << "### End retained entries" << std::endl;
   }
+  return os.str();
 }
 
 // update sketch
 
 template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
-lg_cur_size_(lg_cur_size),
-lg_nom_size_(lg_nom_size),
-keys_(1 << lg_cur_size_, 0),
-num_keys_(0),
-rf_(rf),
-p_(p),
-seed_(seed),
-capacity_(get_capacity(lg_cur_size, lg_nom_size))
-{
-  if (p < 1) this->theta_ *= p;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
-theta_sketch_alloc<A>(is_empty, theta),
-lg_cur_size_(lg_cur_size),
-lg_nom_size_(lg_nom_size),
-keys_(std::move(keys)),
-num_keys_(num_keys),
-rf_(rf),
-p_(p),
-seed_(seed),
-capacity_(get_capacity(lg_cur_size, lg_nom_size))
+update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    uint64_t theta, uint64_t seed, const A& allocator):
+table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
 {}
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
-  return num_keys_;
+A update_theta_sketch_alloc<A>::get_allocator() const {
+  return table_.allocator_;
 }
 
 template<typename A>
-uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
-  return theta_sketch_alloc<A>::get_seed_hash(seed_);
+bool update_theta_sketch_alloc<A>::is_empty() const {
+  return table_.is_empty_;
 }
 
 template<typename A>
@@ -253,169 +106,28 @@ bool update_theta_sketch_alloc<A>::is_ordered() const {
 }
 
 template<typename A>
-string<A> update_theta_sketch_alloc<A>::to_string(bool print_items) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  os << "### Update Theta sketch summary:" << std::endl;
-  os << "   lg nominal size      : " << (int) lg_nom_size_ << std::endl;
-  os << "   lg current size      : " << (int) lg_cur_size_ << std::endl;
-  os << "   num retained keys    : " << num_keys_ << std::endl;
-  os << "   resize factor        : " << (1 << rf_) << std::endl;
-  os << "   sampling probability : " << p_ << std::endl;
-  os << "   seed hash            : " << this->get_seed_hash() << std::endl;
-  os << "   empty?               : " << (this->is_empty() ? "true" : "false") << std::endl;
-  os << "   ordered?             : " << (this->is_ordered() ? "true" : "false") << std::endl;
-  os << "   estimation mode?     : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
-  os << "   theta (fraction)     : " << this->get_theta() << std::endl;
-  os << "   theta (raw 64-bit)   : " << this->theta_ << std::endl;
-  os << "   estimate             : " << this->get_estimate() << std::endl;
-  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
-  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
-  os << "### End sketch summary" << std::endl;
-  if (print_items) {
-    os << "### Retained keys" << std::endl;
-    for (auto key: *this) os << "   " << key << std::endl;
-    os << "### End retained keys" << std::endl;
-  }
-  return os.str();
-}
-
-template<typename A>
-void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
-  const uint8_t preamble_longs_and_rf = 3 | (rf_ << 6);
-  os.write((char*)&preamble_longs_and_rf, sizeof(preamble_longs_and_rf));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
-  os.write((char*)&serial_version, sizeof(serial_version));
-  const uint8_t type = SKETCH_TYPE;
-  os.write((char*)&type, sizeof(type));
-  os.write((char*)&lg_nom_size_, sizeof(lg_nom_size_));
-  os.write((char*)&lg_cur_size_, sizeof(lg_cur_size_));
-  const uint8_t flags_byte(
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
-  );
-  os.write((char*)&flags_byte, sizeof(flags_byte));
-  const uint16_t seed_hash = get_seed_hash();
-  os.write((char*)&seed_hash, sizeof(seed_hash));
-  os.write((char*)&num_keys_, sizeof(num_keys_));
-  os.write((char*)&p_, sizeof(p_));
-  os.write((char*)&(this->theta_), sizeof(uint64_t));
-  os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
-}
-
-template<typename A>
-vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  const uint8_t preamble_longs = 3;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
-  vector_u8<A> bytes(size);
-  uint8_t* ptr = bytes.data() + header_size_bytes;
-
-  const uint8_t preamble_longs_and_rf = preamble_longs | (rf_ << 6);
-  ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
-  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
-  const uint8_t type = SKETCH_TYPE;
-  ptr += copy_to_mem(&type, ptr, sizeof(type));
-  ptr += copy_to_mem(&lg_nom_size_, ptr, sizeof(lg_nom_size_));
-  ptr += copy_to_mem(&lg_cur_size_, ptr, sizeof(lg_cur_size_));
-  const uint8_t flags_byte(
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
-  );
-  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
-  const uint16_t seed_hash = get_seed_hash();
-  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
-  ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
-  ptr += copy_to_mem(&p_, ptr, sizeof(p_));
-  ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
-  ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
-
-  return bytes;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
-  uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
-  resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
-  preamble_longs &= 0x3f; // remove resize factor
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t type;
-  is.read((char*)&type, sizeof(type));
-  uint8_t lg_nom_size;
-  is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed);
+uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
+  return table_.theta_;
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
-  uint32_t num_keys;
-  is.read((char*)&num_keys, sizeof(num_keys));
-  float p;
-  is.read((char*)&p, sizeof(p));
-  uint64_t theta;
-  is.read((char*)&theta, sizeof(theta));
-  vector_u64<A> keys(1 << lg_cur_size);
-  is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is.good()) throw std::runtime_error("error reading from std::istream"); 
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
+uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
+  return table_.num_entries_;
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
-  ensure_minimum_memory(size, 8);
-  const char* ptr = static_cast<const char*>(bytes);
-  uint8_t preamble_longs;
-  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
-  resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
-  preamble_longs &= 0x3f; // remove resize factor
-  uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
-  uint8_t type;
-  ptr += copy_from_mem(ptr, &type, sizeof(type));
-  uint8_t lg_nom_size;
-  ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
-  uint8_t lg_cur_size;
-  ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
-  uint16_t seed_hash;
-  ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed);
+uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
+  return compute_seed_hash(table_.seed_);
 }
 
 template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
-  const uint32_t table_size = 1 << lg_cur_size;
-  ensure_minimum_memory(size, 16 + sizeof(uint64_t) * table_size);
-  const char* ptr = static_cast<const char*>(bytes);
-  uint32_t num_keys;
-  ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
-  float p;
-  ptr += copy_from_mem(ptr, &p, sizeof(p));
-  uint64_t theta;
-  ptr += copy_from_mem(ptr, &theta, sizeof(theta));
-  vector_u64<A> keys(table_size);
-  ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
+uint8_t update_theta_sketch_alloc<A>::get_lg_k() const {
+  return table_.lg_nom_size_;
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::update(const std::string& value) {
-  if (value.empty()) return;
-  update(value.c_str(), value.length());
+auto update_theta_sketch_alloc<A>::get_rf() const -> resize_factor {
+  return table_.rf_;
 }
 
 template<typename A>
@@ -460,19 +172,7 @@ void update_theta_sketch_alloc<A>::update(int8_t value) {
 
 template<typename A>
 void update_theta_sketch_alloc<A>::update(double value) {
-  union {
-    int64_t long_value;
-    double double_value;
-  } long_double_union;
-
-  if (value == 0.0) {
-    long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
-  } else if (std::isnan(value)) {
-    long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
-  } else {
-    long_double_union.double_value = value;
-  }
-  update(&long_double_union, sizeof(long_double_union));
+  update(canonical_double(value));
 }
 
 template<typename A>
@@ -481,157 +181,116 @@ void update_theta_sketch_alloc<A>::update(float value) {
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::update(const void* data, unsigned length) {
-  HashState hashes;
-  MurmurHash3_x64_128(data, length, seed_, hashes);
-  const uint64_t hash = hashes.h1 >> 1; // Java implementation does logical shift >>> to make values positive
-  internal_update(hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
-  return compact_theta_sketch_alloc<A>(*this, ordered);
+void update_theta_sketch_alloc<A>::update(const std::string& value) {
+  if (value.empty()) return;
+  update(value.c_str(), value.length());
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
-  this->is_empty_ = false;
-  if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
-  if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
-    num_keys_++;
-    if (num_keys_ > capacity_) {
-      if (lg_cur_size_ <= lg_nom_size_) {
-        resize();
-      } else {
-        rebuild();
-      }
-    }
+void update_theta_sketch_alloc<A>::update(const void* data, size_t length) {
+  const uint64_t hash = table_.hash_and_screen(data, length);
+  if (hash == 0) return;
+  auto result = table_.find(hash);
+  if (!result.second) {
+    table_.insert(result.first, hash);
   }
 }
 
 template<typename A>
 void update_theta_sketch_alloc<A>::trim() {
-  if (num_keys_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
+  table_.trim();
 }
 
 template<typename A>
-void update_theta_sketch_alloc<A>::resize() {
-  const uint8_t lg_tgt_size = lg_nom_size_ + 1;
-  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
-  const uint8_t lg_new_size = lg_cur_size_ + factor;
-  const uint32_t new_size = 1 << lg_new_size;
-  vector_u64<A> new_keys(new_size, 0);
-  for (uint32_t i = 0; i < keys_.size(); i++) {
-    if (keys_[i] != 0) {
-      hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert
-    }
-  }
-  keys_ = std::move(new_keys);
-  lg_cur_size_ += factor;
-  capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
-}
-
-template<typename A>
-void update_theta_sketch_alloc<A>::rebuild() {
-  const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
-  std::nth_element(keys_.begin(), keys_.begin() + pivot, keys_.end());
-  this->theta_ = keys_[pivot];
-  vector_u64<A> new_keys(keys_.size(), 0);
-  num_keys_ = 0;
-  for (uint32_t i = 0; i < keys_.size(); i++) {
-    if (keys_[i] != 0 && keys_[i] < this->theta_) {
-      hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert
-      num_keys_++;
-    }
-  }
-  keys_ = std::move(new_keys);
+auto update_theta_sketch_alloc<A>::begin() -> iterator {
+  return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
-  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
-  return std::floor(fraction * (1 << lg_cur_size));
+auto update_theta_sketch_alloc<A>::end() -> iterator {
+  return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
 
 template<typename A>
-uint32_t update_theta_sketch_alloc<A>::get_stride(uint64_t hash, uint8_t lg_size) {
-  // odd and independent of index assuming lg_size lowest bits of the hash were used for the index
-  return (2 * static_cast<uint32_t>((hash >> lg_size) & STRIDE_MASK)) + 1;
+auto update_theta_sketch_alloc<A>::begin() const -> const_iterator {
+  return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-bool update_theta_sketch_alloc<A>::hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size) {
-  const uint32_t mask = (1 << lg_size) - 1;
-  const uint32_t stride = get_stride(hash, lg_size);
-  uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
+auto update_theta_sketch_alloc<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
+}
 
-  // search for duplicate or zero
-  const uint32_t loop_index = cur_probe;
-  do {
-    const uint64_t value = table[cur_probe];
-    if (value == 0) {
-      table[cur_probe] = hash; // insert value
-      return true;
-    } else if (value == hash) {
-      return false; // found a duplicate
-    }
-    cur_probe = (cur_probe + stride) & mask;
-  } while (cur_probe != loop_index);
-  throw std::logic_error("key not found and no empty slots!");
-}
-
-template<typename A>
-bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size) {
-  const uint32_t mask = (1 << lg_size) - 1;
-  const uint32_t stride = update_theta_sketch_alloc<A>::get_stride(hash, lg_size);
-  uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
-  const uint32_t loop_index = cur_probe;
-  do {
-    const uint64_t value = table[cur_probe];
-    if (value == 0) {
-      return false;
-    } else if (value == hash) {
-      return true;
-    }
-    cur_probe = (cur_probe + stride) & mask;
-  } while (cur_probe != loop_index);
-  throw std::logic_error("key not found and search wrapped");
+template<typename A>
+compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
+  return compact_theta_sketch_alloc<A>(*this, ordered);
 }
 
 template<typename A>
-typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
+void update_theta_sketch_alloc<A>::print_specifics(ostrstream& os) const {
+  os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
+  os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
+  os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
 }
 
+// builder
+
 template<typename A>
-typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
+update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
+
+template<typename A>
+update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
+  return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
 }
 
 // compact sketch
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
-theta_sketch_alloc<A>(is_empty, theta),
-keys_(std::move(keys)),
+compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Base& other, bool ordered):
+is_empty_(other.is_empty()),
+is_ordered_(other.is_ordered() || ordered),
+seed_hash_(other.get_seed_hash()),
+theta_(other.get_theta64()),
+entries_(other.get_allocator())
+{
+  entries_.reserve(other.get_num_retained());
+  std::copy(other.begin(), other.end(), std::back_inserter(entries_));
+  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
+}
+
+template<typename A>
+compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
+    std::vector<uint64_t, A>&& entries):
+is_empty_(is_empty),
+is_ordered_(is_ordered),
 seed_hash_(seed_hash),
-is_ordered_(is_ordered)
+theta_(theta),
+entries_(std::move(entries))
 {}
 
 template<typename A>
-compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered):
-theta_sketch_alloc<A>(other),
-keys_(other.get_num_retained()),
-seed_hash_(other.get_seed_hash()),
-is_ordered_(other.is_ordered() || ordered)
-{
-  std::copy(other.begin(), other.end(), keys_.begin());
-  if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
+A compact_theta_sketch_alloc<A>::get_allocator() const {
+  return entries_.get_allocator();
+}
+
+template<typename A>
+bool compact_theta_sketch_alloc<A>::is_empty() const {
+  return is_empty_;
+}
+
+template<typename A>
+bool compact_theta_sketch_alloc<A>::is_ordered() const {
+  return is_ordered_;
+}
+
+template<typename A>
+uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
+  return theta_;
 }
 
 template<typename A>
 uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
-  return keys_.size();
+  return entries_.size();
 }
 
 template<typename A>
@@ -640,158 +299,148 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
 }
 
 template<typename A>
-bool compact_theta_sketch_alloc<A>::is_ordered() const {
-  return is_ordered_;
+auto compact_theta_sketch_alloc<A>::begin() -> iterator {
+  return iterator(entries_.data(), entries_.size(), 0);
 }
 
 template<typename A>
-string<A> compact_theta_sketch_alloc<A>::to_string(bool print_items) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  os << "### Compact Theta sketch summary:" << std::endl;
-  os << "   num retained keys    : " << keys_.size() << std::endl;
-  os << "   seed hash            : " << this->get_seed_hash() << std::endl;
-  os << "   empty?               : " << (this->is_empty() ? "true" : "false") << std::endl;
-  os << "   ordered?             : " << (this->is_ordered() ? "true" : "false") << std::endl;
-  os << "   estimation mode?     : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
-  os << "   theta (fraction)     : " << this->get_theta() << std::endl;
-  os << "   theta (raw 64-bit)   : " << this->theta_ << std::endl;
-  os << "   estimate             : " << this->get_estimate() << std::endl;
-  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
-  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
-  os << "### End sketch summary" << std::endl;
-  if (print_items) {
-    os << "### Retained keys" << std::endl;
-    for (auto key: *this) os << "   " << key << std::endl;
-    os << "### End retained keys" << std::endl;
-  }
-  return os.str();
+auto compact_theta_sketch_alloc<A>::end() -> iterator {
+  return iterator(nullptr, 0, entries_.size());
 }
 
 template<typename A>
+auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
+  return const_iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+void compact_theta_sketch_alloc<A>::print_specifics(ostrstream&) const {}
+
+template<typename A>
 void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
-  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
+  const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
   os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
+  const uint8_t serial_version = SERIAL_VERSION;
   os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
   const uint8_t type = SKETCH_TYPE;
   os.write(reinterpret_cast<const char*>(&type), sizeof(type));
   const uint16_t unused16 = 0;
   os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
   const uint8_t flags_byte(
-    (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
-    (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
-    (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
+    (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
   const uint16_t seed_hash = get_seed_hash();
-  os.write((char*)&seed_hash, sizeof(seed_hash));
+  os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_keys = keys_.size();
-      os.write((char*)&num_keys, sizeof(num_keys));
+      const uint32_t num_entries = entries_.size();
+      os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
       const uint32_t unused32 = 0;
-      os.write((char*)&unused32, sizeof(unused32));
+      os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
       if (this->is_estimation_mode()) {
-        os.write((char*)&(this->theta_), sizeof(uint64_t));
+        os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
       }
     }
-    os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
+    os.write(reinterpret_cast<const char*>(entries_.data()), entries_.size() * sizeof(uint64_t));
   }
 }
 
 template<typename A>
-vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
+auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
+  const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
   const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
-  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
-  vector_u8<A> bytes(size);
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
+      + sizeof(uint64_t) * entries_.size();
+  vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
   ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
-  const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
+  const uint8_t serial_version = SERIAL_VERSION;
   ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
   const uint8_t type = SKETCH_TYPE;
   ptr += copy_to_mem(&type, ptr, sizeof(type));
   const uint16_t unused16 = 0;
   ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
   const uint8_t flags_byte(
-    (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
-    (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
-    (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
-    (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
+    (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
   const uint16_t seed_hash = get_seed_hash();
   ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
   if (!this->is_empty()) {
     if (!is_single_item) {
-      const uint32_t num_keys = keys_.size();
-      ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
+      const uint32_t num_entries = entries_.size();
+      ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
       const uint32_t unused32 = 0;
       ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
       if (this->is_estimation_mode()) {
-        ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
+        ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
       }
     }
-    ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
+    ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
   }
-
   return bytes;
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
+  is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
   uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
+  is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
   uint8_t type;
-  is.read((char*)&type, sizeof(type));
+  is.read(reinterpret_cast<char*>(&type), sizeof(type));
   uint16_t unused16;
-  is.read((char*)&unused16, sizeof(unused16));
+  is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
   uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
+  is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
   uint16_t seed_hash;
-  is.read((char*)&seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
-  uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint32_t num_keys = 0;
-
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+  is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
+  checker<true>::check_sketch_type(type, SKETCH_TYPE);
+  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
+
+  uint64_t theta = theta_constants::MAX_THETA;
+  uint32_t num_entries = 0;
   if (!is_empty) {
     if (preamble_longs == 1) {
-      num_keys = 1;
+      num_entries = 1;
     } else {
-      is.read((char*)&num_keys, sizeof(num_keys));
+      is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
       uint32_t unused32;
-      is.read((char*)&unused32, sizeof(unused32));
+      is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
       if (preamble_longs > 2) {
-        is.read((char*)&theta, sizeof(theta));
+        is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
       }
     }
   }
-  vector_u64<A> keys(num_keys);
-  if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  if (!is_empty) is.read(reinterpret_cast<char*>(entries.data()), sizeof(uint64_t) * entries.size());
 
-  const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
-  if (!is.good()) throw std::runtime_error("error reading from std::istream"); 
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }
 
 template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
+  const char* base = ptr;
   uint8_t preamble_longs;
   ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
   uint8_t serial_version;
@@ -804,28 +453,19 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
   ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
   uint16_t seed_hash;
   ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
-  theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
-  theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
-  if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
-  return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
-}
-
-template<typename A>
-compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
-  const char* ptr = static_cast<const char*>(bytes);
-  const char* base = ptr;
-
-  uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
-  uint32_t num_keys = 0;
+  checker<true>::check_sketch_type(type, SKETCH_TYPE);
+  checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
 
-  const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
+  uint64_t theta = theta_constants::MAX_THETA;
+  uint32_t num_entries = 0;
   if (!is_empty) {
     if (preamble_longs == 1) {
-      num_keys = 1;
+      num_entries = 1;
     } else {
       ensure_minimum_memory(size, 8); // read the first prelong before this method
-      ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
+      ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
       uint32_t unused32;
       ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
       if (preamble_longs > 2) {
@@ -834,106 +474,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserializ
       }
     }
   }
-  const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
-  check_memory_size(ptr - base + keys_size_bytes, size);
-  vector_u64<A> keys(num_keys);
-  if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
-
-  const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
-  return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const {
-  return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
-}
-
-// builder
-
-template<typename A>
-update_theta_sketch_alloc<A>::builder::builder():
-lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
-  if (lg_k < MIN_LG_K) {
-    throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
-  }
-  lg_k_ = lg_k;
-  return *this;
-}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_resize_factor(resize_factor rf) {
-  rf_ = rf;
-  return *this;
-}
+  const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
+  check_memory_size(ptr - base + entries_size_bytes, size);
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
 
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_p(float p) {
-  p_ = p;
-  return *this;
-}
-
-template<typename A>
-typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_seed(uint64_t seed) {
-  seed_ = seed;
-  return *this;
-}
-
-template<typename A>
-uint8_t update_theta_sketch_alloc<A>::builder::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
-  return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
-}
-
-template<typename A>
-update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
-  return update_theta_sketch_alloc<A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
-}
-
-// iterator
-
-template<typename A>
-theta_sketch_alloc<A>::const_iterator::const_iterator(const uint64_t* keys, uint32_t size, uint32_t index):
-keys_(keys), size_(size), index_(index) {
-  while (index_ < size_ && keys_[index_] == 0) ++index_;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator& theta_sketch_alloc<A>::const_iterator::operator++() {
-  do {
-    ++index_;
-  } while (index_ < size_ && keys_[index_] == 0);
-  return *this;
-}
-
-template<typename A>
-typename theta_sketch_alloc<A>::const_iterator theta_sketch_alloc<A>::const_iterator::operator++(int) {
-  const_iterator tmp(*this);
-  operator++();
-  return tmp;
-}
-
-template<typename A>
-bool theta_sketch_alloc<A>::const_iterator::operator==(const const_iterator& other) const {
-  return index_ == other.index_;
-}
-
-template<typename A>
-bool theta_sketch_alloc<A>::const_iterator::operator!=(const const_iterator& other) const {
-  return index_ != other.index_;
-}
-
-template<typename A>
-uint64_t theta_sketch_alloc<A>::const_iterator::operator*() const {
-  return keys_[index_];
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }
 
 } /* namespace datasketches */
 
 #endif
+
diff --git a/be/src/thirdparty/datasketches/theta_union.hpp b/be/src/thirdparty/datasketches/theta_union.hpp
index 6cf8ccc..44f9b52 100644
--- a/be/src/thirdparty/datasketches/theta_union.hpp
+++ b/be/src/thirdparty/datasketches/theta_union.hpp
@@ -20,103 +20,70 @@
 #ifndef THETA_UNION_HPP_
 #define THETA_UNION_HPP_
 
-#include <memory>
-#include <functional>
-#include <climits>
-
+#include "serde.hpp"
 #include "theta_sketch.hpp"
+#include "theta_union_base.hpp"
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
+template<typename Allocator = std::allocator<uint64_t>>
 class theta_union_alloc {
 public:
-  class builder;
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_alloc<Allocator>;
+  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
+  using resize_factor = theta_constants::resize_factor;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
+  using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
 
   // No constructor here. Use builder instead.
+  class builder;
 
   /**
    * This method is to update the union with a given sketch
    * @param sketch to update the union with
    */
-  void update(const theta_sketch_alloc<A>& sketch);
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
 
   /**
    * This method produces a copy of the current state of the union as a compact sketch.
    * @param ordered optional flag to specify if ordered sketch should be produced
    * @return the result of the union
    */
-  compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
+  CompactSketch get_result(bool ordered = true) const;
 
 private:
-  bool is_empty_;
-  uint64_t theta_;
-  update_theta_sketch_alloc<A> state_;
+  State state_;
 
   // for builder
-  theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state);
+  theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
 };
 
-// builder
-
 template<typename A>
-class theta_union_alloc<A>::builder {
+class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
 public:
-  typedef typename update_theta_sketch_alloc<A>::resize_factor resize_factor;
-
-  /**
-   * Set log2(k), where k is a nominal number of entries in the sketch
-   * @param lg_k base 2 logarithm of nominal number of entries
-   * @return this builder
-   */
-  builder& set_lg_k(uint8_t lg_k);
-
-  /**
-   * Set resize factor for the internal hash table (defaults to 8)
-   * @param rf resize factor
-   * @return this builder
-   */
-  builder& set_resize_factor(resize_factor rf);
-
-  /**
-   * Set sampling probability (initial theta). The default is 1, so the sketch retains
-   * all entries until it reaches the limit, at which point it goes into the estimation mode
-   * and reduces the effective sampling probability (theta) as necessary.
-   * @param p sampling probability
-   * @return this builder
-   */
-  builder& set_p(float p);
-
-  /**
-   * Set the seed for the hash function. Should be used carefully if needed.
-   * Sketches produced with different seed are not compatible
-   * and cannot be mixed in set operations.
-   * @param seed hash seed
-   * @return this builder
-   */
-  builder& set_seed(uint64_t seed);
+  builder(const A& allocator = A());
 
   /**
    * This is to create an instance of the union with predefined parameters.
-   * @return and instance of the union
+   * @return an instance of the union
    */
   theta_union_alloc<A> build() const;
-
-private:
-  typename update_theta_sketch_alloc<A>::builder sketch_builder;
 };
 
 // alias with default allocator for convenience
-typedef theta_union_alloc<std::allocator<void>> theta_union;
+using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
 
 } /* namespace datasketches */
 
 #include "theta_union_impl.hpp"
 
-# endif
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_base.hpp b/be/src/thirdparty/datasketches/theta_union_base.hpp
new file mode 100644
index 0000000..d41f5bd
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_union_base.hpp
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_BASE_HPP_
+#define THETA_UNION_BASE_HPP_
+
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Policy,
+  typename Sketch,
+  typename CompactSketch,
+  typename Allocator
+>
+class theta_union_base {
+public:
+  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename hash_table::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+
+  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
+
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
+
+  CompactSketch get_result(bool ordered = true) const;
+
+  const Policy& get_policy() const;
+
+private:
+  Policy policy_;
+  hash_table table_;
+  uint64_t union_theta_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_union_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_base_impl.hpp b/be/src/thirdparty/datasketches/theta_union_base_impl.hpp
new file mode 100644
index 0000000..ec8ce56
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_union_base_impl.hpp
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_BASE_IMPL_HPP_
+#define THETA_UNION_BASE_IMPL_HPP_
+
+#include <algorithm>
+
+#include "conditional_forward.hpp"
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+    uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
+policy_(policy),
+table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
+union_theta_(table_.theta_)
+{}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+template<typename SS>
+void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
+  if (sketch.is_empty()) return;
+  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+  table_.is_empty_ = false;
+  if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
+  for (auto& entry: sketch) {
+    const uint64_t hash = EK()(entry);
+    if (hash < union_theta_) {
+      auto result = table_.find(hash);
+      if (!result.second) {
+        table_.insert(result.first, conditional_forward<SS>(entry));
+      } else {
+        policy_(*result.first, conditional_forward<SS>(entry));
+      }
+    } else {
+      if (sketch.is_ordered()) break; // early stop
+    }
+  }
+  if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+  std::vector<EN, A> entries(table_.allocator_);
+  if (table_.is_empty_) return CS(true, true, compute_seed_hash(table_.seed_), union_theta_, std::move(entries));
+  entries.reserve(table_.num_entries_);
+  uint64_t theta = std::min(union_theta_, table_.theta_);
+  const uint32_t nominal_num = 1 << table_.lg_nom_size_;
+  if (union_theta_ >= theta && table_.num_entries_ <= nominal_num) {
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
+  } else {
+    std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
+    if (entries.size() > nominal_num) {
+      std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
+      theta = EK()(entries[nominal_num]);
+      entries.erase(entries.begin() + nominal_num, entries.end());
+      entries.shrink_to_fit();
+    }
+  }
+  if (ordered) std::sort(entries.begin(), entries.end(), comparator());
+  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
+}
+
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
+  return policy_;
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_union_impl.hpp b/be/src/thirdparty/datasketches/theta_union_impl.hpp
index 4d8ebaa..4708d70 100644
--- a/be/src/thirdparty/datasketches/theta_union_impl.hpp
+++ b/be/src/thirdparty/datasketches/theta_union_impl.hpp
@@ -22,86 +22,30 @@
 
 namespace datasketches {
 
-/*
- * author Alexander Saydakov
- * author Lee Rhodes
- * author Kevin Lang
- */
-
-template<typename A>
-theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
-is_empty_(true), theta_(theta), state_(std::move(state)) {}
-
-template<typename A>
-void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
-  if (sketch.is_empty()) return;
-  if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
-  is_empty_ = false;
-  if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
-  if (sketch.is_ordered()) {
-    for (auto hash: sketch) {
-      if (hash >= theta_) break; // early stop
-      state_.internal_update(hash);
-    }
-  } else {
-    for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
-  }
-  if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
-}
-
 template<typename A>
-compact_theta_sketch_alloc<A> theta_union_alloc<A>::get_result(bool ordered) const {
-  if (is_empty_) return state_.compact(ordered);
-  const uint32_t nom_num_keys = 1 << state_.lg_nom_size_;
-  if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
-  uint64_t theta = std::min(theta_, state_.get_theta64());
-  vector_u64<A> keys(state_.get_num_retained());
-  uint32_t num_keys = 0;
-  for (auto key: state_) {
-    if (key < theta) keys[num_keys++] = key;
-  }
-  if (num_keys > nom_num_keys) {
-    std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
-    theta = keys[nom_num_keys];
-    num_keys = nom_num_keys;
-  }
-  if (num_keys != state_.get_num_retained()) {
-    keys.resize(num_keys);
-  }
-  if (ordered) std::sort(keys.begin(), keys.end());
-  return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
-}
-
-// builder
+theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
+{}
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
-  sketch_builder.set_lg_k(lg_k);
-  return *this;
+template<typename SS>
+void theta_union_alloc<A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
 }
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_resize_factor(resize_factor rf) {
-  sketch_builder.set_resize_factor(rf);
-  return *this;
+auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
 }
 
 template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_p(float p) {
-  sketch_builder.set_p(p);
-  return *this;
-}
-
-template<typename A>
-typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
-  sketch_builder.set_seed(seed);
-  return *this;
-}
+theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
 
 template<typename A>
-theta_union_alloc<A> theta_union_alloc<A>::builder::build() const {
-  update_theta_sketch_alloc<A> sketch = sketch_builder.build();
-  return theta_union_alloc(sketch.get_theta64(), std::move(sketch));
+auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
+  return theta_union_alloc(
+      this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
+      this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
 }
 
 } /* namespace datasketches */
diff --git a/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp b/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp
new file mode 100644
index 0000000..eae7984
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_update_sketch_base.hpp
@@ -0,0 +1,243 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UPDATE_SKETCH_BASE_HPP_
+#define THETA_UPDATE_SKETCH_BASE_HPP_
+
+#include <vector>
+#include <climits>
+#include <cmath>
+
+#include "common_defs.hpp"
+#include "MurmurHash3.h"
+#include "theta_comparators.hpp"
+#include "theta_constants.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Allocator
+>
+struct theta_update_sketch_base {
+  using resize_factor = theta_constants::resize_factor;
+  using comparator = compare_by_key<ExtractKey>;
+
+  theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator, bool is_empty = true);
+  theta_update_sketch_base(const theta_update_sketch_base& other);
+  theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
+  ~theta_update_sketch_base();
+  theta_update_sketch_base& operator=(const theta_update_sketch_base& other);
+  theta_update_sketch_base& operator=(theta_update_sketch_base&& other);
+
+  using iterator = Entry*;
+
+  inline uint64_t hash_and_screen(const void* data, size_t length);
+
+  inline std::pair<iterator, bool> find(uint64_t key) const;
+
+  template<typename FwdEntry>
+  inline void insert(iterator it, FwdEntry&& entry);
+
+  iterator begin() const;
+  iterator end() const;
+
+  // resize threshold = 0.5 tuned for speed
+  static constexpr double RESIZE_THRESHOLD = 0.5;
+  // hash table rebuild threshold = 15/16
+  static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
+
+  static constexpr uint8_t STRIDE_HASH_BITS = 7;
+  static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
+
+  Allocator allocator_;
+  bool is_empty_;
+  uint8_t lg_cur_size_;
+  uint8_t lg_nom_size_;
+  resize_factor rf_;
+  uint32_t num_entries_;
+  uint64_t theta_;
+  uint64_t seed_;
+  Entry* entries_;
+
+  void resize();
+  void rebuild();
+  void trim();
+
+  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
+  static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
+  static void consolidate_non_empty(Entry* entries, size_t size, size_t num);
+};
+
+// builder
+
+template<typename Derived, typename Allocator>
+class theta_base_builder {
+public:
+  using resize_factor = theta_constants::resize_factor;
+  static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
+  static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
+  static const uint8_t DEFAULT_LG_K = 12;
+  static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+
+  /**
+   * Creates and instance of the builder with default parameters.
+   */
+  theta_base_builder(const Allocator& allocator);
+
+  /**
+   * Set log2(k), where k is a nominal number of entries in the sketch
+   * @param lg_k base 2 logarithm of nominal number of entries
+   * @return this builder
+   */
+  Derived& set_lg_k(uint8_t lg_k);
+
+  /**
+   * Set resize factor for the internal hash table (defaults to 8)
+   * @param rf resize factor
+   * @return this builder
+   */
+  Derived& set_resize_factor(resize_factor rf);
+
+  /**
+   * Set sampling probability (initial theta). The default is 1, so the sketch retains
+   * all entries until it reaches the limit, at which point it goes into the estimation mode
+   * and reduces the effective sampling probability (theta) as necessary.
+   * @param p sampling probability
+   * @return this builder
+   */
+  Derived& set_p(float p);
+
+  /**
+   * Set the seed for the hash function. Should be used carefully if needed.
+   * Sketches produced with different seed are not compatible
+   * and cannot be mixed in set operations.
+   * @param seed hash seed
+   * @return this builder
+   */
+  Derived& set_seed(uint64_t seed);
+
+protected:
+  Allocator allocator_;
+  uint8_t lg_k_;
+  resize_factor rf_;
+  float p_;
+  uint64_t seed_;
+
+  uint64_t starting_theta() const;
+  uint8_t starting_lg_size() const;
+  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
+};
+
+// key extractor
+
+struct trivial_extract_key {
+  template<typename T>
+  auto operator()(T&& entry) const -> decltype(std::forward<T>(entry)) {
+    return std::forward<T>(entry);
+  }
+};
+
+// key not zero
+
+template<typename Entry, typename ExtractKey>
+class key_not_zero {
+public:
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) != 0;
+  }
+};
+
+template<typename Key, typename Entry, typename ExtractKey>
+class key_not_zero_less_than {
+public:
+  explicit key_not_zero_less_than(const Key& key): key(key) {}
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) != 0 && ExtractKey()(entry) < this->key;
+  }
+private:
+  Key key;
+};
+
+// MurMur3 hash functions
+
+static inline uint64_t compute_hash(const void* data, size_t length, uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(data, length, seed, hashes);
+  return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
+}
+
+// iterators
+
+template<typename Entry, typename ExtractKey>
+class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+  theta_iterator(Entry* entries, uint32_t size, uint32_t index);
+  theta_iterator& operator++();
+  theta_iterator operator++(int);
+  bool operator==(const theta_iterator& other) const;
+  bool operator!=(const theta_iterator& other) const;
+  Entry& operator*() const;
+
+private:
+  Entry* entries_;
+  uint32_t size_;
+  uint32_t index_;
+};
+
+template<typename Entry, typename ExtractKey>
+class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+  theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
+  theta_const_iterator& operator++();
+  theta_const_iterator operator++(int);
+  bool operator==(const theta_const_iterator& other) const;
+  bool operator!=(const theta_const_iterator& other) const;
+  const Entry& operator*() const;
+
+private:
+  const Entry* entries_;
+  uint32_t size_;
+  uint32_t index_;
+};
+
+// double value canonicalization for compatibility with Java
+static inline int64_t canonical_double(double value) {
+  union {
+    int64_t long_value;
+    double double_value;
+  } long_double_union;
+
+  if (value == 0.0) {
+    long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
+  } else if (std::isnan(value)) {
+    long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
+  } else {
+    long_double_union.double_value = value;
+  }
+  return long_double_union.long_value;
+}
+
+} /* namespace datasketches */
+
+#include "theta_update_sketch_base_impl.hpp"
+
+#endif
diff --git a/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp b/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp
new file mode 100644
index 0000000..a343c78
--- /dev/null
+++ b/be/src/thirdparty/datasketches/theta_update_sketch_base_impl.hpp
@@ -0,0 +1,394 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
+#define THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
+allocator_(allocator),
+is_empty_(is_empty),
+lg_cur_size_(lg_cur_size),
+lg_nom_size_(lg_nom_size),
+rf_(rf),
+num_entries_(0),
+theta_(theta),
+seed_(seed),
+entries_(nullptr)
+{
+  if (lg_cur_size > 0) {
+    const size_t size = 1 << lg_cur_size;
+    entries_ = allocator_.allocate(size);
+    for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(const theta_update_sketch_base& other):
+allocator_(other.allocator_),
+is_empty_(other.is_empty_),
+lg_cur_size_(other.lg_cur_size_),
+lg_nom_size_(other.lg_nom_size_),
+rf_(other.rf_),
+num_entries_(other.num_entries_),
+theta_(other.theta_),
+seed_(other.seed_),
+entries_(nullptr)
+{
+  if (other.entries_ != nullptr) {
+    const size_t size = 1 << lg_cur_size_;
+    entries_ = allocator_.allocate(size);
+    for (size_t i = 0; i < size; ++i) {
+      if (EK()(other.entries_[i]) != 0) {
+        new (&entries_[i]) EN(other.entries_[i]);
+      } else {
+        EK()(entries_[i]) = 0;
+      }
+    }
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
+allocator_(std::move(other.allocator_)),
+is_empty_(other.is_empty_),
+lg_cur_size_(other.lg_cur_size_),
+lg_nom_size_(other.lg_nom_size_),
+rf_(other.rf_),
+num_entries_(other.num_entries_),
+theta_(other.theta_),
+seed_(other.seed_),
+entries_(other.entries_)
+{
+  other.entries_ = nullptr;
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
+{
+  if (entries_ != nullptr) {
+    const size_t size = 1 << lg_cur_size_;
+    for (size_t i = 0; i < size; ++i) {
+      if (EK()(entries_[i]) != 0) entries_[i].~EN();
+    }
+    allocator_.deallocate(entries_, size);
+  }
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(const theta_update_sketch_base& other) {
+  theta_update_sketch_base<EN, EK, A> copy(other);
+  std::swap(allocator_, copy.allocator_);
+  std::swap(is_empty_, copy.is_empty_);
+  std::swap(lg_cur_size_, copy.lg_cur_size_);
+  std::swap(lg_nom_size_, copy.lg_nom_size_);
+  std::swap(rf_, copy.rf_);
+  std::swap(num_entries_, copy.num_entries_);
+  std::swap(theta_, copy.theta_);
+  std::swap(seed_, copy.seed_);
+  std::swap(entries_, copy.entries_);
+  return *this;
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operator=(theta_update_sketch_base&& other) {
+  std::swap(allocator_, other.allocator_);
+  std::swap(is_empty_, other.is_empty_);
+  std::swap(lg_cur_size_, other.lg_cur_size_);
+  std::swap(lg_nom_size_, other.lg_nom_size_);
+  std::swap(rf_, other.rf_);
+  std::swap(num_entries_, other.num_entries_);
+  std::swap(theta_, other.theta_);
+  std::swap(seed_, other.seed_);
+  std::swap(entries_, other.entries_);
+  return *this;
+}
+
+template<typename EN, typename EK, typename A>
+uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data, size_t length) {
+  is_empty_ = false;
+  const uint64_t hash = compute_hash(data, length, seed_);
+  if (hash >= theta_) return 0; // hash == 0 is reserved to mark empty slots in the table
+  return hash;
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
+  const size_t size = 1 << lg_cur_size_;
+  const size_t mask = size - 1;
+  const uint32_t stride = get_stride(key, lg_cur_size_);
+  uint32_t index = static_cast<uint32_t>(key) & mask;
+  // search for duplicate or zero
+  const uint32_t loop_index = index;
+  do {
+    const uint64_t probe = EK()(entries_[index]);
+    if (probe == 0) {
+      return std::pair<iterator, bool>(&entries_[index], false);
+    } else if (probe == key) {
+      return std::pair<iterator, bool>(&entries_[index], true);
+    }
+    index = (index + stride) & mask;
+  } while (index != loop_index);
+  throw std::logic_error("key not found and no empty slots!");
+}
+
+template<typename EN, typename EK, typename A>
+template<typename Fwd>
+void theta_update_sketch_base<EN, EK, A>::insert(iterator it, Fwd&& entry) {
+  new (it) EN(std::forward<Fwd>(entry));
+  ++num_entries_;
+  if (num_entries_ > get_capacity(lg_cur_size_, lg_nom_size_)) {
+    if (lg_cur_size_ <= lg_nom_size_) {
+      resize();
+    } else {
+      rebuild();
+    }
+  }
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
+  return entries_;
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
+  return &entries_[1 << lg_cur_size_];
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
+  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
+  return std::floor(fraction * (1 << lg_cur_size));
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t lg_size) {
+  // odd and independent of index assuming lg_size lowest bits of the key were used for the index
+  return (2 * static_cast<uint32_t>((key >> lg_size) & STRIDE_MASK)) + 1;
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::resize() {
+  const size_t old_size = 1 << lg_cur_size_;
+  const uint8_t lg_tgt_size = lg_nom_size_ + 1;
+  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
+  lg_cur_size_ += factor;
+  const size_t new_size = 1 << lg_cur_size_;
+  EN* old_entries = entries_;
+  entries_ = allocator_.allocate(new_size);
+  for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  for (size_t i = 0; i < old_size; ++i) {
+    const uint64_t key = EK()(old_entries[i]);
+    if (key != 0) {
+      insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
+      old_entries[i].~EN();
+    }
+  }
+  allocator_.deallocate(old_entries, old_size);
+}
+
+// assumes number of entries > nominal size
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::rebuild() {
+  const size_t size = 1 << lg_cur_size_;
+  const uint32_t nominal_size = 1 << lg_nom_size_;
+
+  // empty entries have uninitialized payloads
+  // TODO: avoid this for empty or trivial payloads (arithmetic types)
+  consolidate_non_empty(entries_, size, num_entries_);
+
+  std::nth_element(entries_, entries_ + nominal_size, entries_ + num_entries_, comparator());
+  this->theta_ = EK()(entries_[nominal_size]);
+  EN* old_entries = entries_;
+  const size_t num_old_entries = num_entries_;
+  entries_ = allocator_.allocate(size);
+  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  // relies on consolidating non-empty entries to the front
+  for (size_t i = 0; i < nominal_size; ++i) {
+    insert(find(EK()(old_entries[i])).first, std::move(old_entries[i])); // consider a special insert with no comparison
+    old_entries[i].~EN();
+  }
+  for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
+  allocator_.deallocate(old_entries, size);
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::trim() {
+  if (num_entries_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, size_t size, size_t num) {
+  // find the first empty slot
+  size_t i = 0;
+  while (i < size) {
+    if (EK()(entries[i]) == 0) break;
+    ++i;
+  }
+  // scan the rest and move non-empty entries to the front
+  for (size_t j = i + 1; j < size; ++j) {
+    if (EK()(entries[j]) != 0) {
+      new (&entries[i]) EN(std::move(entries[j]));
+      entries[j].~EN();
+      EK()(entries[j]) = 0;
+      ++i;
+      if (i == num) break;
+    }
+  }
+}
+
+// builder
+
+template<typename Derived, typename Allocator>
+theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
+allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
+  if (lg_k < MIN_LG_K) {
+    throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
+  }
+  if (lg_k > MAX_LG_K) {
+    throw std::invalid_argument("lg_k must not be greater than " + std::to_string(MAX_LG_K) + ": " + std::to_string(lg_k));
+  }
+  lg_k_ = lg_k;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_resize_factor(resize_factor rf) {
+  rf_ = rf;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_p(float p) {
+  if (p <= 0 || p > 1) throw std::invalid_argument("sampling probability must be between 0 and 1");
+  p_ = p;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
+  seed_ = seed;
+  return static_cast<Derived&>(*this);
+}
+
+template<typename Derived, typename Allocator>
+uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
+  if (p_ < 1) return theta_constants::MAX_THETA * p_;
+  return theta_constants::MAX_THETA;
+}
+
+template<typename Derived, typename Allocator>
+uint8_t theta_base_builder<Derived, Allocator>::starting_lg_size() const {
+  return starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
+}
+
+template<typename Derived, typename Allocator>
+uint8_t theta_base_builder<Derived, Allocator>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
+  return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
+}
+
+// iterator
+
+template<typename Entry, typename ExtractKey>
+theta_iterator<Entry, ExtractKey>::theta_iterator(Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++() -> theta_iterator& {
+  ++index_;
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+  return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator++(int) -> theta_iterator {
+  theta_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator!=(const theta_iterator& other) const {
+  return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other) const {
+  return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
+  return entries_[index_];
+}
+
+// const iterator
+
+template<typename Entry, typename ExtractKey>
+theta_const_iterator<Entry, ExtractKey>::theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++() -> theta_const_iterator& {
+  ++index_;
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+  return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++(int) -> theta_const_iterator {
+  theta_const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator!=(const theta_const_iterator& other) const {
+  return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_iterator& other) const {
+  return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry& {
+  return entries_[index_];
+}
+
+} /* namespace datasketches */
+
+#endif
diff --git a/be/src/thirdparty/datasketches/u32_table.hpp b/be/src/thirdparty/datasketches/u32_table.hpp
index 2316fc1..fe228a5 100644
--- a/be/src/thirdparty/datasketches/u32_table.hpp
+++ b/be/src/thirdparty/datasketches/u32_table.hpp
@@ -39,8 +39,8 @@ template<typename A>
 class u32_table {
 public:
 
-  u32_table();
-  u32_table(uint8_t lg_size, uint8_t num_valid_bits);
+  u32_table(const A& allocator);
+  u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
 
   inline size_t get_num_items() const;
   inline const uint32_t* get_slots() const;
@@ -52,7 +52,7 @@ public:
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
 
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
+  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
 
   vector_u32<A> unwrapping_get_items() const;
 
diff --git a/be/src/thirdparty/datasketches/u32_table_impl.hpp b/be/src/thirdparty/datasketches/u32_table_impl.hpp
index aa44ba2..bf8ece9 100644
--- a/be/src/thirdparty/datasketches/u32_table_impl.hpp
+++ b/be/src/thirdparty/datasketches/u32_table_impl.hpp
@@ -29,19 +29,19 @@
 namespace datasketches {
 
 template<typename A>
-u32_table<A>::u32_table():
+u32_table<A>::u32_table(const A& allocator):
 lg_size(0),
 num_valid_bits(0),
 num_items(0),
-slots()
+slots(allocator)
 {}
 
 template<typename A>
-u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
+u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX)
+slots(1 << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
@@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
 
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
-  u32_table<A> table(lg_num_slots, 6 + lg_k);
+  u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
   // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
   // However, we are starting out with the correct final table size, so the problem might not occur
   for (size_t i = 0; i < num_pairs; i++) {
@@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
-  slots = vector_u32<A>(new_size, UINT32_MAX);
+  slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 // The result is nearly sorted, so make sure to use an efficient sort for that case
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return vector_u32<A>();
+  if (num_items == 0) return vector_u32<A>(slots.get_allocator());
   const size_t table_size = 1 << lg_size;
-  vector_u32<A> result(num_items);
+  vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;

[impala] 03/04: IMPALA-10611: Fix flakiness in test_wide_row

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8b46d00f641abdb89d3d22de5c9a5d7044075893
Author: Riza Suminto <ri...@cloudera.com>
AuthorDate: Sat Apr 17 07:26:15 2021 -0700

    IMPALA-10611: Fix flakiness in test_wide_row
    
    test_wide_row has been intermittently failed with "Failed to allocate
    row batch" error message. This is due to recent change in IMPALA-9856
    that add query option max_row_size=10MB without raising the mem_limit.
    This patch fix the flakiness by increasing the mem_limit from 100 MB to
    132 MB to account for 32 MB reservation needed by BufferedPlanRootSink.
    
    Testing:
    - Loop the test in local dev machine.
    
    Change-Id: Ie1f0b7d4d6b3a875d9b408f057d46fdbdbdf2a34
    Reviewed-on: http://gerrit.cloudera.org:8080/17324
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/query_test/test_scanners.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 0373395..44d540c 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -267,10 +267,12 @@ class TestWideRow(ImpalaTestSuite):
     # We need > 10 MB of memory because we're creating extra buffers:
     # - 10 MB table / 5 MB scan range = 2 scan ranges, each of which may allocate ~20MB
     # - Sync reads will allocate ~5MB of space
-    # The 100MB value used here was determined empirically by raising the limit until the
+    # - Result spooling require 32 MB initial reservation (2 page of 16 MB each) to fit
+    #   10 MB row.
+    # The 132MB value used here was determined empirically by raising the limit until the
     # query succeeded for all file formats -- I don't know exactly why we need this much.
     # TODO: figure out exact breakdown of memory usage (IMPALA-681)
-    new_vector.get_value('exec_option')['mem_limit'] = 100 * 1024 * 1024
+    new_vector.get_value('exec_option')['mem_limit'] = 132 * 1024 * 1024
     # Specify that the query should able to handle 10 MB MAX_ROW_SIZE.
     new_vector.get_value('exec_option')['max_row_size'] = 10 * 1024 * 1024
     self.run_test_case('QueryTest/wide-row', new_vector)

[impala] 01/04: IMPALA-10658: LOAD DATA INPATH silently fails between HDFS and Azure ABFS

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 8336b7b3cd8ba90d37c3f7454a9c9c4074bca1f0
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
AuthorDate: Tue Apr 13 22:04:49 2021 +0200

    IMPALA-10658: LOAD DATA INPATH silently fails between HDFS and Azure ABFS
    
    LOAD DATA INPATH silently fails when Impala tries to move files from
    HDFS to ABFS. The problem is that we use FileSystem.makeQualified(Path)
    to decide if path is on a given filesystem. We expect to get an
    IllegalArgumentException if path is on a different filesystem. However,
    the Azure FileSystem implementation doesn't throw this exception.
    
    Because of that Impala thinks that an 'hdfs://' path and an 'abfs://'
    path is on the same filesystem, so it tries to move files with
    FileSystem.rename(). In case of errors rename() might throw an
    Exception, or return false. Impala doesn't check the return value,
    therefore if rename() returns false then the error remains silent.
    
    This patch fixes Impala's isPathOnFileSystem() and also adds a check
    for the return value of rename().
    
    Testing:
     * tested manually between HDFS and Azure ABFS.
     * added JUnit test to FileSystemUtilTest
    
    Change-Id: Id807e8a200b83283a09d3a917185cabab930017d
    Reviewed-on: http://gerrit.cloudera.org:8080/17316
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/common/FileSystemUtil.java   | 16 ++++++---
 .../apache/impala/common/FileSystemUtilTest.java   | 40 +++++++++++++++++++++-
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
index 50e4ead..9393009 100644
--- a/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
+++ b/fe/src/main/java/org/apache/impala/common/FileSystemUtil.java
@@ -263,7 +263,10 @@ public class FileSystemUtil {
             "Moving '%s' to '%s'", sourceFile.toString(), destFile.toString()));
       }
       // Move (rename) the file.
-      destFs.rename(sourceFile, destFile);
+      if (!destFs.rename(sourceFile, destFile)) {
+        throw new IOException(String.format(
+            "Failed to move '%s' to '%s'", sourceFile, destFile));
+      }
       return;
     }
     if (destIsDfs && sameFileSystem) {
@@ -579,13 +582,16 @@ public class FileSystemUtil {
    * Return true iff the path is on the given filesystem.
    */
   public static boolean isPathOnFileSystem(Path path, FileSystem fs) {
+    // Path 'path' must be qualified already.
+    Preconditions.checkState(
+        !path.equals(Path.getPathWithoutSchemeAndAuthority(path)),
+        String.format("Path '%s' is not qualified.", path));
     try {
-      // Call makeQualified() for the side-effect of FileSystem.checkPath() which will
-      // throw an exception if path is not on fs.
-      fs.makeQualified(path);
-      return true;
+      Path qp = fs.makeQualified(path);
+      return path.equals(qp);
     } catch (IllegalArgumentException e) {
       // Path is not on fs.
+      LOG.debug(String.format("Path '%s' is not on file system '%s'", path, fs));
       return false;
     }
   }
diff --git a/fe/src/test/java/org/apache/impala/common/FileSystemUtilTest.java b/fe/src/test/java/org/apache/impala/common/FileSystemUtilTest.java
index 2ed60f6..3105690 100644
--- a/fe/src/test/java/org/apache/impala/common/FileSystemUtilTest.java
+++ b/fe/src/test/java/org/apache/impala/common/FileSystemUtilTest.java
@@ -30,6 +30,9 @@ import org.junit.Test;
 import org.mockito.Mockito;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 
 /**
  * Tests for the various util methods in FileSystemUtil class
@@ -163,6 +166,41 @@ public class FileSystemUtilTest {
     // TODO: enable following tests if we add them into impala mini cluster.
     // testValidLoadDataInpath(mockLocation(FileSystemUtil.SCHEME_O3FS), true);
     // testValidLoadDataInpath(mockLocation(FileSystemUtil.SCHEME_ALLUXIO), false);
+    // Also extend testIsPathOnFileSystem().
+  }
+
+  @Test
+  public void testIsPathOnFileSystem() throws IOException {
+    List<String> schemes = Arrays.asList(
+        FileSystemUtil.SCHEME_ABFS,
+        FileSystemUtil.SCHEME_ABFSS,
+        FileSystemUtil.SCHEME_ADL,
+        FileSystemUtil.SCHEME_HDFS,
+        FileSystemUtil.SCHEME_S3A,
+        FileSystemUtil.SCHEME_FILE);
+    List<Path> mockFiles = new ArrayList<>();
+    for (String scheme : schemes) {
+      mockFiles.add(new Path(mockLocation(scheme)));
+    }
+    List<FileSystem> fileSystems = new ArrayList<>();
+    for (Path mockFile : mockFiles) {
+      fileSystems.add(FileSystemUtil.getFileSystemForPath(mockFile));
+    }
+    for (int i = 0; i < fileSystems.size(); ++i) {
+      FileSystem fs = fileSystems.get(i);
+      for (int j = 0; j < mockFiles.size(); ++j) {
+        Path mockFile = mockFiles.get(j);
+        if (i == j) {
+          assertTrue(String.format(
+                          "Path '%s' should be on file system '%s'", mockFile, fs),
+                     FileSystemUtil.isPathOnFileSystem(mockFile, fs));
+        } else {
+          assertFalse(String.format(
+                          "Path '%s' should not be on file system '%s'", mockFile, fs),
+                      FileSystemUtil.isPathOnFileSystem(mockFile, fs));
+        }
+      }
+    }
   }
 
   private boolean testIsInIgnoredDirectory(Path input) {
@@ -185,7 +223,7 @@ public class FileSystemUtilTest {
       case FileSystemUtil.SCHEME_ADL:
         return "adl://dummy-account.azuredatalakestore.net/dummy-part-3";
       case FileSystemUtil.SCHEME_FILE:
-        return "file://tmp/dummy-part-4";
+        return "file:///tmp/dummy-part-4";
       case FileSystemUtil.SCHEME_HDFS:
         return "hdfs://localhost:20500/dummy-part-5";
       case FileSystemUtil.SCHEME_S3A: