You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/11/05 23:20:42 UTC
[incubator-datasketches-cpp] 03/04: return vector from
unwrapping_get_items
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch cpc_template
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 12bb6b7a4e4b94f733e96596916fcf8dc095f59f
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 5 13:46:02 2019 -0800
return vector from unwrapping_get_items
---
cpc/include/cpc_common.hpp | 5 ++++-
cpc/include/cpc_compressor_impl.hpp | 28 ++++++++++++----------------
cpc/include/u32_table.hpp | 8 ++++----
cpc/include/u32_table_impl.hpp | 11 +++++------
4 files changed, 25 insertions(+), 27 deletions(-)
diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index e372136..6b81d0f 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -23,7 +23,6 @@
#include <memory>
#include "MurmurHash3.h"
-#include "u32_table.hpp"
namespace datasketches {
@@ -41,8 +40,12 @@ template<typename A> using AllocU32 = typename std::allocator_traits<A>::templat
template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
+template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
+// forward declaration
+template<typename A> class u32_table;
+
struct compressed_state {
u32_ptr_with_deleter table_data_ptr;
uint32_t table_data_words;
diff --git a/cpc/include/cpc_compressor_impl.hpp b/cpc/include/cpc_compressor_impl.hpp
index 2a8bba2..8ee5d58 100644
--- a/cpc/include/cpc_compressor_impl.hpp
+++ b/cpc/include/cpc_compressor_impl.hpp
@@ -186,11 +186,10 @@ void cpc_compressor<A>::uncompress(const compressed_state& source, uncompressed_
template<typename A>
void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state* result) const {
if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
- uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
const size_t num_pairs = source.surprising_value_table.get_num_items();
- std::sort(pairs, &pairs[num_pairs]);
- compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
- AllocU32<A>().deallocate(pairs, num_pairs);
+ std::sort(pairs.begin(), pairs.end());
+ compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
}
template<typename A>
@@ -209,21 +208,20 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
const size_t k = 1 << source.get_lg_k();
- uint32_t* pairs_from_table = source.surprising_value_table.unwrapping_get_items();
+ vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
const size_t num_pairs_from_table = source.surprising_value_table.get_num_items();
- if (num_pairs_from_table > 0) std::sort(pairs_from_table, &pairs_from_table[num_pairs_from_table]);
+ if (num_pairs_from_table > 0) std::sort(pairs_from_table.begin(), pairs_from_table.end());
const size_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
uint32_t* all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table);
u32_table<A>::merge(
- pairs_from_table, 0, num_pairs_from_table,
+ pairs_from_table.data(), 0, num_pairs_from_table,
all_pairs, num_pairs_from_table, num_pairs_from_window,
all_pairs, 0
); // note the overlapping subarray trick
compress_surprising_values(all_pairs, source.get_num_coupons(), source.get_lg_k(), result);
- if (pairs_from_table != nullptr) AllocU32<A>().deallocate(pairs_from_table, num_pairs_from_table);
AllocU32<A>().deallocate(all_pairs, source.get_num_coupons());
}
@@ -259,7 +257,7 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
const size_t num_pairs = source.surprising_value_table.get_num_items();
if (num_pairs > 0) {
- uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
// Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
// of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -271,9 +269,8 @@ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source
pairs[i] -= 8;
}
- std::sort(pairs, &pairs[num_pairs]);
- compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
- AllocU32<A>().deallocate(pairs, num_pairs);
+ std::sort(pairs.begin(), pairs.end());
+ compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
}
}
@@ -300,7 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
const size_t num_pairs = source.surprising_value_table.get_num_items();
if (num_pairs > 0) {
- uint32_t* pairs = source.surprising_value_table.unwrapping_get_items();
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
// Here we apply a complicated transformation to the column indices, which
// changes the implied ordering of the pairs, so we must do it before sorting.
@@ -323,9 +320,8 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
pairs[i] = (row << 6) | col;
}
- std::sort(pairs, &pairs[num_pairs]);
- compress_surprising_values(pairs, num_pairs, source.get_lg_k(), result);
- AllocU32<A>().deallocate(pairs, num_pairs);
+ std::sort(pairs.begin(), pairs.end());
+ compress_surprising_values(pairs.data(), num_pairs, source.get_lg_k(), result);
}
}
diff --git a/cpc/include/u32_table.hpp b/cpc/include/u32_table.hpp
index a7f1093..beb6f82 100644
--- a/cpc/include/u32_table.hpp
+++ b/cpc/include/u32_table.hpp
@@ -25,6 +25,8 @@
// This is a highly specialized hash table that was designed
// to be a part of the library's CPC sketch implementation
+#include "cpc_common.hpp"
+
namespace datasketches {
static const uint64_t U32_TABLE_UPSIZE_NUMER = 3LL;
@@ -52,7 +54,7 @@ public:
static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
- uint32_t* unwrapping_get_items() const;
+ vector_u32<A> unwrapping_get_items() const;
static void merge(
const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -61,13 +63,11 @@ public:
);
private:
- typedef typename std::allocator_traits<A>::template rebind_alloc<uint32_t> AllocU32;
- typedef std::vector<uint32_t, AllocU32> vector_u32;
uint8_t lg_size; // log2 of number of slots
uint8_t num_valid_bits;
size_t num_items;
- vector_u32 slots;
+ vector_u32<A> slots;
inline size_t lookup(uint32_t item) const;
inline void must_insert(uint32_t item);
diff --git a/cpc/include/u32_table_impl.hpp b/cpc/include/u32_table_impl.hpp
index 08316ed..269fa91 100644
--- a/cpc/include/u32_table_impl.hpp
+++ b/cpc/include/u32_table_impl.hpp
@@ -151,8 +151,8 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
const size_t old_size = 1 << lg_size;
const size_t new_size = 1 << new_lg_size;
if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
- vector_u32 old_slots = std::move(slots);
- slots = vector_u32(new_size, UINT32_MAX);
+ vector_u32<A> old_slots = std::move(slots);
+ slots = vector_u32<A>(new_size, UINT32_MAX);
lg_size = new_lg_size;
for (size_t i = 0; i < old_size; i++) {
if (old_slots[i] != UINT32_MAX) {
@@ -167,12 +167,11 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
// the load factor would have to be over 90 percent before this would fail frequently,
// and even then the subsequent sort would fix things up.
// The result is nearly sorted, so make sure to use an efficient sort for that case
-// This is for internal use, so deallocation is on the caller.
template<typename A>
-uint32_t* u32_table<A>::unwrapping_get_items() const {
- if (num_items == 0) return nullptr;
+vector_u32<A> u32_table<A>::unwrapping_get_items() const {
+ if (num_items == 0) return vector_u32<A>();
const size_t table_size = 1 << lg_size;
- uint32_t* result = AllocU32().allocate(num_items);
+ vector_u32<A> result(num_items);
size_t i = 0;
size_t l = 0;
size_t r = num_items - 1;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org