You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/11/05 23:20:42 UTC

[incubator-datasketches-cpp] 03/04: return vector from unwrapping_get_items

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 12bb6b7a4e4b94f733e96596916fcf8dc095f59f
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 13:46:02 2019 -0800

    return vector from unwrapping_get_items
---
 cpc/include/cpc_common.hpp          |  5 ++++-
 cpc/include/cpc_compressor_impl.hpp | 28 ++++++++++++----------------
 cpc/include/u32_table.hpp           |  8 ++++----
 cpc/include/u32_table_impl.hpp      | 11 +++++------
 4 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index e372136..6b81d0f 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -23,7 +23,6 @@
 #include <memory>
 
 #include "MurmurHash3.h"
-#include "u32_table.hpp"
 
 namespace datasketches {
 
@@ -41,8 +40,12 @@ template<typename A> using AllocU32 = typename std::allocator_traits<A>::templat
 template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
 
 template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
+template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
 template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
 
+// forward declaration
+template<typename A> class u32_table;
+
 struct compressed_state {
   u32_ptr_with_deleter table_data_ptr;
   uint32_t table_data_words;
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index 2a8bba2..8ee5d58 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -186,11 +186,10 @@ void cpc_compressor<A>::uncompress(const compressed_state& source, uncompressed_
 template<typename A>
 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
   if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
-  uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
   const size_t num_pairs = source.surprising_value_table.get_num_items();
-  std::sort(pairs, &pairs[num_pairs]);
-  compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-  AllocU32<A>().deallocate(pairs, num_pairs);
+  std::sort(pairs.begin(), pairs.end());
+  compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
 }
 
 template<typename A>
@@ -209,21 +208,20 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
   if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
   const size_t k = 1 << source.get_lg_k();
-  uint32_t* pairs_from_table = source.surprising_value_table.unwrapping_get_items();
+  vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
   const size_t num_pairs_from_table = source.surprising_value_table.get_num_items();
-  if (num_pairs_from_table > 0) std::sort(pairs_from_table, &pairs_from_table[num_pairs_from_table]);
+  if (num_pairs_from_table > 0) std::sort(pairs_from_table.begin(), pairs_from_table.end());
   const size_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
 
   uint32_t* all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table);
 
   u32_table<A>::merge(
-      pairs_from_table, 0, num_pairs_from_table,
+      pairs_from_table.data(), 0, num_pairs_from_table,
       all_pairs, num_pairs_from_table, num_pairs_from_window,
       all_pairs, 0
   );  // note the overlapping subarray trick
 
   compress_surprising_values(all_pairs, source.get_num_coupons(), source.get_lg_k(), result);
-  if (pairs_from_table != nullptr) AllocU32<A>().deallocate(pairs_from_table, num_pairs_from_table);
   AllocU32<A>().deallocate(all_pairs, source.get_num_coupons());
 }
 
@@ -259,7 +257,7 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
   const size_t num_pairs = source.surprising_value_table.get_num_items();
   if (num_pairs > 0) {
-    uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
 
     // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
     // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -271,9 +269,8 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
       pairs[i] -= 8;
     }
 
-    std::sort(pairs, &pairs[num_pairs]);
-    compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    std::sort(pairs.begin(), pairs.end());
+    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
   }
 }
 
@@ -300,7 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
   const size_t num_pairs = source.surprising_value_table.get_num_items();
   if (num_pairs > 0) {
-    uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+    vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
 
     // Here we apply a complicated transformation to the column indices, which
     // changes the implied ordering of the pairs, so we must do it before sorting.
@@ -323,9 +320,8 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
       pairs[i] = (row << 6) | col;
     }
 
-    std::sort(pairs, &pairs[num_pairs]);
-    compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
-    AllocU32<A>().deallocate(pairs, num_pairs);
+    std::sort(pairs.begin(), pairs.end());
+    compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
   }
 }
 
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index a7f1093..beb6f82 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -25,6 +25,8 @@
 // This is a highly specialized hash table that was designed
 // to be a part of the library's CPC sketch implementation
 
+#include "cpc_common.hpp"
+
 namespace datasketches {
 
 static const uint64_t U32_TABLE_UPSIZE_NUMER = 3LL;
@@ -52,7 +54,7 @@ public:
 
   static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
 
-  uint32_t* unwrapping_get_items() const;
+  vector_u32<A> unwrapping_get_items() const;
 
   static void merge(
     const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -61,13 +63,11 @@ public:
   );
 
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint32_t> AllocU32;
-  typedef std::vector<uint32_t, AllocU32> vector_u32;
 
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
   size_t num_items;
-  vector_u32 slots;
+  vector_u32<A> slots;
 
   inline size_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index 08316ed..269fa91 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -151,8 +151,8 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
   const size_t old_size = 1 << lg_size;
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
-  vector_u32 old_slots = std::move(slots);
-  slots = vector_u32(new_size, UINT32_MAX);
+  vector_u32<A> old_slots = std::move(slots);
+  slots = vector_u32<A>(new_size, UINT32_MAX);
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -167,12 +167,11 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 // the load factor would have to be over 90 percent before this would fail frequently,
 // and even then the subsequent sort would fix things up.
 // The result is nearly sorted, so make sure to use an efficient sort for that case
-// This is for internal use, so deallocation is on the caller.
 template<typename A>
-uint32_t* u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return nullptr;
+vector_u32<A> u32_table<A>::unwrapping_get_items() const {
+  if (num_items == 0) return vector_u32<A>();
   const size_t table_size = 1 << lg_size;
-  uint32_t* result = AllocU32().allocate(num_items);
+  vector_u32<A> result(num_items);
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org