You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/11/05 23:20:39 UTC

[incubator-datasketches-cpp] branch cpc_template updated (e91b8bc -> d4e0b32)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


    from e91b8bc  common type definitions
     new 7ffbe1b  moved aliases to the front
     new ed6c431  added a brief description
     new 12bb6b7  return vector from unwrapping_get_items
     new d4e0b32  use vector to manage allocation and deallocation

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 cpc/include/cpc_common.hpp          |  5 ++-
 cpc/include/cpc_compressor.hpp      | 15 +++++++--
 cpc/include/cpc_compressor_impl.hpp | 67 +++++++++++++++----------------------
 cpc/include/cpc_sketch.hpp          | 12 ++++---
 cpc/include/cpc_union.hpp           |  8 ++---
 cpc/include/u32_table.hpp           |  8 ++---
 cpc/include/u32_table_impl.hpp      | 11 +++---
 7 files changed, 64 insertions(+), 62 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 02/04: added a brief description

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit ed6c43133b1259b9bfe6b6ab82565d6d3f44a35f
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 12:11:23 2019 -0800

    added a brief description
---
 cpc/include/cpc_compressor.hpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index 6c02ec6..1fee421 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -26,12 +26,21 @@
 
 namespace datasketches {
 
+/*
+ * This is a very efficient compressor specialized for use by the CPC Sketch.
+ * There are two very different compression schemes here: one for the sliding window
+ * and another for the table of so-called surprising values.
+ * These two compression schemes are designed for specific probability distributions of entries
+ * in these data structures and make some compromises for performance. As a result
+ * the compression is slightly less effective than theoretically achievable but is very fast.
+ */
+
 // forward declarations
 template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_compressor;
 
-// do not instantiate compressor directly
-// use this global function to statically allocate and construct on first use
+// the compressor is not instantiated directly
+// the sketch implementation uses this global function to statically allocate and construct on the first use
 template<typename A>
 inline cpc_compressor<A>& get_compressor();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 01/04: moved aliases to the front

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 7ffbe1b4b9136af26df63c52e64612126f09f46a
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 12:10:54 2019 -0800

    moved aliases to the front
---
 cpc/include/cpc_sketch.hpp | 12 ++++++++----
 cpc/include/cpc_union.hpp  |  8 ++++----
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index 6cc1120..c71f183 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -37,7 +37,11 @@
 namespace datasketches {
 
 /*
- * High performance C++ implementation of Compressed Probabilistic Counting sketch
+ * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
+ *
+ * This is a very compact (in serialized form) distinct counting sketch.
+ * The theory is described in the following paper:
+ * https://arxiv.org/abs/1708.06839
  *
  * author Kevin Lang
  * author Alexander Saydakov
@@ -47,6 +51,9 @@ namespace datasketches {
 template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_union_alloc;
 
+// alias with default allocator for convenience
+typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
+
 template<typename A>
 class cpc_sketch_alloc {
   public:
@@ -163,9 +170,6 @@ class cpc_sketch_alloc {
     friend cpc_union_alloc<A>;
 };
 
-// alias with default allocator for convenience
-typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
-
 } /* namespace datasketches */
 
 #include "cpc_sketch_impl.hpp"
diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp
index 4841fdb..9601cc0 100644
--- a/cpc/include/cpc_union.hpp
+++ b/cpc/include/cpc_union.hpp
@@ -31,12 +31,15 @@
 namespace datasketches {
 
 /*
- * High performance C++ implementation of Compressed Probabilistic Counting sketch
+ * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Union
  *
  * author Kevin Lang
  * author Alexander Saydakov
  */
 
+// alias with default allocator for convenience
+typedef cpc_union_alloc<std::allocator<void>> cpc_union;
+
 template<typename A>
 class cpc_union_alloc {
   public:
@@ -73,9 +76,6 @@ class cpc_union_alloc {
     void reduce_k(uint8_t new_lg_k);
 };
 
-// alias with default allocator for convenience
-typedef cpc_union_alloc<std::allocator<void>> cpc_union;
-
 } /* namespace datasketches */
 
 #include "cpc_union_impl.hpp"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 03/04: return vector from unwrapping_get_items

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 12bb6b7a4e4b94f733e96596916fcf8dc095f59f
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 13:46:02 2019 -0800

    return vector from unwrapping_get_items
---
 cpc/include/cpc_common.hpp          |  5 ++++-
 cpc/include/cpc_compressor_impl.hpp | 28 ++++++++++++----------------
 cpc/include/u32_table.hpp           |  8 ++++----
 cpc/include/u32_table_impl.hpp      | 11 +++++------
 4 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index e372136..6b81d0f 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -23,7 +23,6 @@
 #include <memory>
 
 #include "MurmurHash3.h"
-#include "u32_table.hpp"
 
 namespace datasketches {
 
@@ -41,8 +40,12 @@ template<typename A> using AllocU32 = typename std::allocator_traits<A>::templat
 template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
 
 template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
+template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
 template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
 
+// forward declaration
+template<typename A> class u32_table;
+
 struct compressed_state {
   u32_ptr_with_deleter table_data_ptr;
   uint32_t table_data_words;
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index 2a8bba2..8ee5d58 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -186,11 +186,10 @@ void cpc_compressor<A>::uncompress(const compressed_state& source, uncompressed_
 template<typename A>
 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
   if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
-  uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
   const size_t num_pairs = source.surprising_value_table.get_num_items();
-  std::sort(pairs, &pairs[num_pairs]);
-  compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-  AllocU32<A>().deallocate(pairs, num_pairs);
+  std::sort(pairs.begin(), pairs.end());
+  compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
 }
 
 template<typename A>
@@ -209,21 +208,20 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
   if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
   const size_t k = 1 << source.get_lg_k();
-  uint32_t* pairs_from_table = source.surprising_value_table.unwrapping_get_items();
+  vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
   const size_t num_pairs_from_table = source.surprising_value_table.get_num_items();
-  if (num_pairs_from_table > 0) std::sort(pairs_from_table, &pairs_from_table[num_pairs_from_table]);
+  if (num_pairs_from_table > 0) std::sort(pairs_from_table.begin(), pairs_from_table.end());
   const size_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
 
   uint32_t* all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table);
 
   u32_table<A>::merge(
-      pairs_from_table, 0, num_pairs_from_table,
+      pairs_from_table.data(), 0, num_pairs_from_table,
       all_pairs, num_pairs_from_table, num_pairs_from_window,
       all_pairs, 0
   );  // note the overlapping subarray trick
 
   compress_surprising_values(all_pairs, source.get_num_coupons(), source.get_lg_k(), result);
-  if (pairs_from_table != nullptr) AllocU32<A>().deallocate(pairs_from_table, num_pairs_from_table);
   AllocU32<A>().deallocate(all_pairs, source.get_num_coupons());
 }
 
@@ -259,7 +257,7 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
   const size_t num_pairs = source.surprising_value_table.get_num_items();
   if (num_pairs > 0) {
-    uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
 
     // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
     // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -271,9 +269,8 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
       pairs[i] -= 8;
     }
 
-    std::sort(pairs, &pairs[num_pairs]);
-    compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    std::sort(pairs.begin(), pairs.end());
+    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
   }
 }
 
@@ -300,7 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
   const size_t num_pairs = source.surprising_value_table.get_num_items();
   if (num_pairs > 0) {
-    uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
 
     // Here we apply a complicated transformation to the column indices, which
     // changes the implied ordering of the pairs, so we must do it before sorting.
@@ -323,9 +320,8 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
       pairs[i] = (row << 6) | col;
     }
 
-    std::sort(pairs, &pairs[num_pairs]);
-    compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    std::sort(pairs.begin(), pairs.end());
+    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
   }
 }
 
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index a7f1093..beb6f82 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -25,6 +25,8 @@
 // This is a highly specialized hash table that was designed
 // to be a part of the library's CPC sketch implementation
 
+#include "cpc_common.hpp"
+
 namespace datasketches {
 
 static const uint64_t U32_TABLE_UPSIZE_NUMER = 3LL;
@@ -52,7 +54,7 @@ public:
 
   static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
 
-  uint32_t* unwrapping_get_items() const;
+  vector_u32<A> unwrapping_get_items() const;
 
   static void merge(
     const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -61,13 +63,11 @@ public:
   );
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint32_t> AllocU32;
-  typedef std::vector<uint32_t, AllocU32> vector_u32;
 
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
   size_t num_items;
-  vector_u32 slots;
+  vector_u32<A> slots;
 
   inline size_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index 08316ed..269fa91 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -151,8 +151,8 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
   const size_t old_size = 1 << lg_size;
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
-  vector_u32 old_slots = std::move(slots);
-  slots = vector_u32(new_size, UINT32_MAX);
+  vector_u32<A> old_slots = std::move(slots);
+  slots = vector_u32<A>(new_size, UINT32_MAX);
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -167,12 +167,11 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 // the load factor would have to be over 90 percent before this would fail frequently,
 // and even then the subsequent sort would fix things up.
 // The result is nearly sorted, so make sure to use an efficient sort for that case
-// This is for internal use, so deallocation is on the caller.
 template<typename A>
-uint32_t* u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return nullptr;
+vector_u32<A> u32_table<A>::unwrapping_get_items() const {
+  if (num_items == 0) return vector_u32<A>();
   const size_t table_size = 1 << lg_size;
-  uint32_t* result = AllocU32().allocate(num_items);
+  vector_u32<A> result(num_items);
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 04/04: use vector to manage allocation and deallocation

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit d4e0b32ed2c2d0aa389857b37ac9f61744825e94
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 15:20:24 2019 -0800

    use vector to manage allocation and deallocation
---
 cpc/include/cpc_compressor.hpp      |  2 +-
 cpc/include/cpc_compressor_impl.hpp | 49 +++++++++++++++----------------------
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/cpc/include/cpc_compressor.hpp b/cpc/include/cpc_compressor.hpp
index 1fee421..2a553ed 100644
--- a/cpc/include/cpc_compressor.hpp
+++ b/cpc/include/cpc_compressor.hpp
@@ -129,7 +129,7 @@ private:
   void compress_surprising_values(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, compressed_state* result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state* target) const;
 
-  uint32_t* uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
+  vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
   void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
 
   static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index 8ee5d58..7314229 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -187,18 +187,16 @@ template<typename A>
 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
   if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
   vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
-  const size_t num_pairs = source.surprising_value_table.get_num_items();
   std::sort(pairs.begin(), pairs.end());
-  compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
+  compress_surprising_values(pairs.data(), pairs.size(), source.get_lg_k(), result);
 }
 
 template<typename A>
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data_ptr != nullptr) throw std::logic_error("unexpected sliding window");
   if (source.table_data_ptr == nullptr) throw std::logic_error("table is expected");
-  uint32_t* pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, source.table_num_entries, lg_k);
-  target.table = u32_table<A>::make_from_pairs(pairs, source.table_num_entries, lg_k);
-  AllocU32<A>().deallocate(pairs, source.table_num_entries);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, source.table_num_entries, lg_k);
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
 }
 
 // This is complicated because it effectively builds a Sparse version
@@ -229,7 +227,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data_ptr != nullptr) throw std::logic_error("window is not expected");
   if (source.table_data_ptr == nullptr) throw std::logic_error("table is expected");
-  uint32_t* pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, source.table_num_entries, lg_k);
+  vector_u32<A> pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, source.table_num_entries, lg_k);
 
   // In the hybrid flavor, some of these pairs actually
   // belong in the window, so we will separate them out,
@@ -248,29 +246,26 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state& source,
       pairs[next_true_pair++] = row_col; // move true pair down
     }
   }
-  target.table = u32_table<A>::make_from_pairs(pairs, next_true_pair, lg_k);
-  AllocU32<A>().deallocate(pairs, source.table_num_entries);
+  target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
 }
 
 template<typename A>
 void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  const size_t num_pairs = source.surprising_value_table.get_num_items();
-  if (num_pairs > 0) {
-    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
-
+  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  if (pairs.size() > 0) {
     // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
     // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
     // we can simply subtract 8 from the pairs themselves.
 
     // shift the columns over by 8 positions before compressing (because of the window)
-    for (size_t i = 0; i < num_pairs; i++) {
+    for (size_t i = 0; i < pairs.size(); i++) {
       if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
       pairs[i] -= 8;
     }
 
     std::sort(pairs.begin(), pairs.end());
-    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
+    compress_surprising_values(pairs.data(), pairs.size(), source.get_lg_k(), result);
   }
 }
 
@@ -281,24 +276,21 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state& source,
   const size_t num_pairs = source.table_num_entries;
   if (num_pairs > 0) {
     if (source.table_data_ptr == nullptr) throw std::logic_error("table is expected");
-    uint32_t* pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, num_pairs, lg_k);
     // undo the compressor's 8-column shift
     for (size_t i = 0; i < num_pairs; i++) {
       if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
       pairs[i] += 8;
     }
-    target.table = u32_table<A>::make_from_pairs(pairs, num_pairs, lg_k);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
   }
 }
 
 template<typename A>
 void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  const size_t num_pairs = source.surprising_value_table.get_num_items();
-  if (num_pairs > 0) {
-    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
-
+  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  if (pairs.size() > 0) {
     // Here we apply a complicated transformation to the column indices, which
     // changes the implied ordering of the pairs, so we must do it before sorting.
 
@@ -308,7 +300,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
     const uint8_t offset = source.window_offset;
     if (offset > 56) throw std::out_of_range("offset out of range");
 
-    for (size_t i = 0; i < num_pairs; i++) {
+    for (size_t i = 0; i < pairs.size(); i++) {
       const uint32_t row_col = pairs[i];
       const size_t row = row_col >> 6;
       uint8_t col = row_col & 63;
@@ -321,7 +313,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
     }
 
     std::sort(pairs.begin(), pairs.end());
-    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
+    compress_surprising_values(pairs.data(), pairs.size(), source.get_lg_k(), result);
   }
 }
 
@@ -334,7 +326,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state& source
     target.table = u32_table<A>(2, 6 + lg_k);
   } else {
     if (source.table_data_ptr == nullptr) throw std::logic_error("table is expected");
-    uint32_t* pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, num_pairs, lg_k);
+    vector_u32<A> pairs = uncompress_surprising_values(source.table_data_ptr.get(), source.table_data_words, num_pairs, lg_k);
 
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
     if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
@@ -354,8 +346,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state& source
       pairs[i] = (row << 6) | col;
     }
 
-    target.table = u32_table<A>::make_from_pairs(pairs, num_pairs, lg_k);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
   }
 }
 
@@ -380,11 +371,11 @@ void cpc_compressor<A>::compress_surprising_values(const uint32_t* pairs, size_t
 }
 
 template<typename A>
-uint32_t* cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
+vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
   const size_t k = 1 << lg_k;
-  uint32_t* pairs = AllocU32<A>().allocate(num_pairs);
+  vector_u32<A> pairs(num_pairs);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
-  low_level_uncompress_pairs(pairs, num_pairs, num_base_bits, data, data_words);
+  low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org