You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/06/27 01:10:49 UTC
[incubator-datasketches-cpp] branch tuple_sketch updated: removed
serde from templates
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new 3eec4da removed serde from templates
3eec4da is described below
commit 3eec4da88087112591bccbd802070122fe7b4672
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jun 26 18:10:39 2020 -0700
removed serde from templates
---
common/include/common_defs.hpp | 4 +
common/include/serde.hpp | 30 ++--
tuple/include/theta_union_experimental.hpp | 4 -
tuple/include/tuple_intersection.hpp | 5 +-
tuple/include/tuple_intersection_impl.hpp | 12 +-
tuple/include/tuple_sketch.hpp | 65 ++++---
tuple/include/tuple_sketch_impl.hpp | 260 +++++++++++++++++-----------
tuple/include/tuple_union.hpp | 11 +-
tuple/include/tuple_union_impl.hpp | 20 +--
tuple/test/tuple_sketch_allocation_test.cpp | 2 +-
tuple/test/tuple_sketch_test.cpp | 29 +++-
11 files changed, 267 insertions(+), 175 deletions(-)
diff --git a/common/include/common_defs.hpp b/common/include/common_defs.hpp
index 4ddcb43..ffb3f19 100644
--- a/common/include/common_defs.hpp
+++ b/common/include/common_defs.hpp
@@ -31,6 +31,10 @@ static const uint64_t DEFAULT_SEED = 9001;
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
+// utility function to hide unused compiler warning
+// usually has no additional cost
+template<typename T> void unused(T&&...) {}
+
// common helping functions
// TODO: find a better place for them
diff --git a/common/include/serde.hpp b/common/include/serde.hpp
index d610adb..b8231c9 100644
--- a/common/include/serde.hpp
+++ b/common/include/serde.hpp
@@ -33,13 +33,13 @@ namespace datasketches {
// serialize and deserialize
template<typename T, typename Enable = void> struct serde {
// stream serialization
- void serialize(std::ostream& os, const T* items, unsigned num);
- void deserialize(std::istream& is, T* items, unsigned num); // items allocated but not initialized
+ void serialize(std::ostream& os, const T* items, unsigned num) const;
+ void deserialize(std::istream& is, T* items, unsigned num) const; // items allocated but not initialized
// raw bytes serialization
- size_t size_of_item(const T& item);
- size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num);
- size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num); // items allocated but not initialized
+ size_t size_of_item(const T& item) const;
+ size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) const;
+ size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) const; // items allocated but not initialized
};
// serde for all fixed-size arithmetic types (int and float of different sizes)
@@ -47,7 +47,7 @@ template<typename T, typename Enable = void> struct serde {
// with LongsSketch and ItemsSketch<Long> with ArrayOfLongsSerDe in Java
template<typename T>
struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
- void serialize(std::ostream& os, const T* items, unsigned num) {
+ void serialize(std::ostream& os, const T* items, unsigned num) const {
bool failure = false;
try {
os.write((char*)items, sizeof(T) * num);
@@ -58,7 +58,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
throw std::runtime_error("error writing to std::ostream with " + std::to_string(num) + " items");
}
}
- void deserialize(std::istream& is, T* items, unsigned num) {
+ void deserialize(std::istream& is, T* items, unsigned num) const {
bool failure = false;
try {
is.read((char*)items, sizeof(T) * num);
@@ -70,16 +70,16 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
}
}
- size_t size_of_item(const T&) {
+ size_t size_of_item(const T&) const {
return sizeof(T);
}
- size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) {
+ size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) const {
const size_t bytes_written = sizeof(T) * num;
check_memory_size(bytes_written, capacity);
memcpy(ptr, items, bytes_written);
return bytes_written;
}
- size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) {
+ size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) const {
const size_t bytes_read = sizeof(T) * num;
check_memory_size(bytes_read, capacity);
memcpy(items, ptr, bytes_read);
@@ -94,7 +94,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
// which may be too wasteful. Treat this as an example.
template<>
struct serde<std::string> {
- void serialize(std::ostream& os, const std::string* items, unsigned num) {
+ void serialize(std::ostream& os, const std::string* items, unsigned num) const {
unsigned i = 0;
bool failure = false;
try {
@@ -110,7 +110,7 @@ struct serde<std::string> {
throw std::runtime_error("error writing to std::ostream at item " + std::to_string(i));
}
}
- void deserialize(std::istream& is, std::string* items, unsigned num) {
+ void deserialize(std::istream& is, std::string* items, unsigned num) const {
unsigned i = 0;
bool failure = false;
try {
@@ -138,10 +138,10 @@ struct serde<std::string> {
throw std::runtime_error("error reading from std::istream at item " + std::to_string(i));
}
}
- size_t size_of_item(const std::string& item) {
+ size_t size_of_item(const std::string& item) const {
return sizeof(uint32_t) + item.size();
}
- size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) {
+ size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
size_t bytes_written = 0;
for (unsigned i = 0; i < num; ++i) {
const uint32_t length = items[i].size();
@@ -155,7 +155,7 @@ struct serde<std::string> {
}
return bytes_written;
}
- size_t deserialize(const void* ptr, size_t capacity, std::string* items, unsigned num) {
+ size_t deserialize(const void* ptr, size_t capacity, std::string* items, unsigned num) const {
size_t bytes_read = 0;
unsigned i = 0;
bool failure = false;
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 73c44f4..a394152 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -29,10 +29,6 @@ namespace datasketches {
// experimental theta union derived from the same base as tuple union
-// utility function to hide unused compiler warning
-// usually has no additional cost
-template<typename T> void unused(T&&...) {}
-
struct pass_through_policy {
uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
unused(incoming_entry);
diff --git a/tuple/include/tuple_intersection.hpp b/tuple/include/tuple_intersection.hpp
index 7bada6f..2086c14 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -38,7 +38,6 @@ struct example_intersection_policy {
template<
typename Summary,
typename Policy,
- typename SerDe = serde<Summary>,
typename Allocator = std::allocator<Summary>
>
class tuple_intersection {
@@ -46,8 +45,8 @@ public:
using Entry = std::pair<uint64_t, Summary>;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using ExtractKey = pair_extract_key<uint64_t, Summary>;
- using Sketch = tuple_sketch<Summary, SerDe, Allocator>;
- using CompactSketch = compact_tuple_sketch<Summary, SerDe, Allocator>;
+ using Sketch = tuple_sketch<Summary, Allocator>;
+ using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
// reformulate the external policy that operates on Summary
// in terms of operations on Entry
diff --git a/tuple/include/tuple_intersection_impl.hpp b/tuple/include/tuple_intersection_impl.hpp
index 1f166b7..f554255 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -19,18 +19,18 @@
namespace datasketches {
-template<typename S, typename P, typename SD, typename A>
-tuple_intersection<S, P, SD, A>::tuple_intersection(uint64_t seed, const P& policy):
+template<typename S, typename P, typename A>
+tuple_intersection<S, P, A>::tuple_intersection(uint64_t seed, const P& policy):
state_(seed, internal_policy(policy))
{}
-template<typename S, typename P, typename SD, typename A>
-void tuple_intersection<S, P, SD, A>::update(const Sketch& sketch) {
+template<typename S, typename P, typename A>
+void tuple_intersection<S, P, A>::update(const Sketch& sketch) {
state_.update(sketch);
}
-template<typename S, typename P, typename SD, typename A>
-auto tuple_intersection<S, P, SD, A>::get_result(bool ordered) const -> CompactSketch {
+template<typename S, typename P, typename A>
+auto tuple_intersection<S, P, A>::get_result(bool ordered) const -> CompactSketch {
return state_.get_result(ordered);
}
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 2015dd4..ef17f5a 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -28,13 +28,12 @@
namespace datasketches {
// forward-declarations
-template<typename S, typename SD, typename A> class tuple_sketch;
-template<typename S, typename U, typename P, typename SD, typename A> class update_tuple_sketch;
-template<typename S, typename SD, typename A> class compact_tuple_sketch;
+template<typename S, typename A> class tuple_sketch;
+template<typename S, typename U, typename P, typename A> class update_tuple_sketch;
+template<typename S, typename A> class compact_tuple_sketch;
template<
typename Summary,
- typename SerDe = serde<Summary>,
typename Allocator = std::allocator<Summary>
>
class tuple_sketch {
@@ -188,12 +187,11 @@ template<
typename Summary,
typename Update = Summary,
typename Policy = default_update_policy<Summary, Update>,
- typename SerDe = serde<Summary>,
typename Allocator = std::allocator<Summary>
>
-class update_tuple_sketch: public tuple_sketch<Summary, SerDe, Allocator> {
+class update_tuple_sketch: public tuple_sketch<Summary, Allocator> {
public:
- using Base = tuple_sketch<Summary, SerDe, Allocator>;
+ using Base = tuple_sketch<Summary, Allocator>;
using Entry = typename Base::Entry;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using ExtractKey = pair_extract_key<uint64_t, Summary>;
@@ -340,7 +338,7 @@ public:
* @param ordered optional flag to specify if ordered sketch should be produced
* @return compact sketch
*/
- compact_tuple_sketch<Summary, SerDe, Allocator> compact(bool ordered = true) const;
+ compact_tuple_sketch<Summary, Allocator> compact(bool ordered = true) const;
virtual const_iterator begin() const;
virtual const_iterator end() const;
@@ -390,12 +388,11 @@ private:
template<
typename Summary,
- typename SerDe = serde<Summary>,
typename Allocator = std::allocator<Summary>
>
-class compact_tuple_sketch: public tuple_sketch<Summary, SerDe, Allocator> {
+class compact_tuple_sketch: public tuple_sketch<Summary, Allocator> {
public:
- using Base = tuple_sketch<Summary, SerDe, Allocator>;
+ using Base = tuple_sketch<Summary, Allocator>;
using Entry = typename Base::Entry;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using const_iterator = typename Base::const_iterator;
@@ -403,6 +400,7 @@ public:
using vector_bytes = std::vector<uint8_t, AllocBytes>;
using ExtractKey = pair_extract_key<uint64_t, Summary>;
using comparator = compare_by_key<Entry, ExtractKey>;
+ using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
static const uint8_t SKETCH_TYPE = 3;
@@ -421,8 +419,11 @@ public:
virtual uint16_t get_seed_hash() const;
virtual string<Allocator> to_string(bool print_items = false) const;
- void serialize(std::ostream& os) const;
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
+ template<typename SerDe = serde<Summary>>
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
+
+ template<typename SerDe = serde<Summary>>
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
virtual const_iterator begin() const;
virtual const_iterator end() const;
@@ -442,7 +443,8 @@ public:
* @param seed the seed for the hash function that was used to create the sketch
* @return an instance of the sketch
*/
- static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+ template<typename SerDe = serde<Summary>>
+ static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const SerDe& sd = SerDe());
// TODO: try to hide this
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
@@ -459,25 +461,40 @@ private:
* This version is for fixed-size arithmetic types (integral and floating point).
* @return size in bytes needed to serialize summaries in this sketch
*/
- template<typename SS = Summary, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type = 0>
- size_t get_serialized_size_summaries_bytes() const;
+ template<typename SerDe, typename SS = Summary, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type = 0>
+ size_t get_serialized_size_summaries_bytes(const SerDe& sd) const;
/**
* Computes size needed to serialize summaries in the sketch.
* This version is for all other types and can be expensive since every item needs to be looked at.
* @return size in bytes needed to serialize summaries in this sketch
*/
- template<typename SS = Summary, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type = 0>
- size_t get_serialized_size_summaries_bytes() const;
-
-// static compact_tuple_sketch<Summary, SerDe, Allocator> internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
-// static compact_tuple_sketch<Summary, SerDe, Allocator> internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
+ template<typename SerDe, typename SS = Summary, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type = 0>
+ size_t get_serialized_size_summaries_bytes(const SerDe& sd) const;
+
+ // for deserialize
+ class deleter_of_summaries {
+ public:
+ deleter_of_summaries(uint32_t num, bool destroy): num(num), destroy(destroy) {}
+ void set_destroy(bool destroy) { this->destroy = destroy; }
+ void operator() (Summary* ptr) const {
+ if (ptr != nullptr) {
+ if (destroy) {
+ for (uint32_t i = 0; i < num; ++i) ptr[i].~Summary();
+ }
+ Allocator().deallocate(ptr, num);
+ }
+ }
+ private:
+ uint32_t num;
+ bool destroy;
+ };
};
// builder
-template<typename S, typename U, typename P, typename SD, typename A>
-class update_tuple_sketch<S, U, P, SD, A>::builder: public theta_base_builder<builder> {
+template<typename S, typename U, typename P, typename A>
+class update_tuple_sketch<S, U, P, A>::builder: public theta_base_builder<builder> {
public:
/**
* Creates and instance of the builder with default parameters.
@@ -488,7 +505,7 @@ public:
* This is to create an instance of the sketch with predefined parameters.
* @return an instance of the sketch
*/
- update_tuple_sketch<S, U, P, SD, A> build() const;
+ update_tuple_sketch<S, U, P, A> build() const;
private:
P policy_;
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index c58af34..f504d4d 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -23,146 +23,146 @@
namespace datasketches {
-template<typename S, typename SD, typename A>
-bool tuple_sketch<S, SD, A>::is_estimation_mode() const {
+template<typename S, typename A>
+bool tuple_sketch<S, A>::is_estimation_mode() const {
return get_theta64() < theta_constants::MAX_THETA && !is_empty();
}
-template<typename S, typename SD, typename A>
-double tuple_sketch<S, SD, A>::get_theta() const {
+template<typename S, typename A>
+double tuple_sketch<S, A>::get_theta() const {
return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
}
-template<typename S, typename SD, typename A>
-double tuple_sketch<S, SD, A>::get_estimate() const {
+template<typename S, typename A>
+double tuple_sketch<S, A>::get_estimate() const {
return get_num_retained() / get_theta();
}
-template<typename S, typename SD, typename A>
-double tuple_sketch<S, SD, A>::get_lower_bound(uint8_t num_std_devs) const {
+template<typename S, typename A>
+double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs) const {
if (!is_estimation_mode()) return get_num_retained();
return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
}
-template<typename S, typename SD, typename A>
-double tuple_sketch<S, SD, A>::get_upper_bound(uint8_t num_std_devs) const {
+template<typename S, typename A>
+double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
if (!is_estimation_mode()) return get_num_retained();
return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
}
// update sketch
-template<typename S, typename U, typename P, typename SD, typename A>
-update_tuple_sketch<S, U, P, SD, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+template<typename S, typename U, typename P, typename A>
+update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
policy_(policy),
map_(lg_cur_size, lg_nom_size, rf, p, seed)
{}
-template<typename S, typename U, typename P, typename SD, typename A>
-bool update_tuple_sketch<S, U, P, SD, A>::is_empty() const {
+template<typename S, typename U, typename P, typename A>
+bool update_tuple_sketch<S, U, P, A>::is_empty() const {
return map_.is_empty_;
}
-template<typename S, typename U, typename P, typename SD, typename A>
-bool update_tuple_sketch<S, U, P, SD, A>::is_ordered() const {
+template<typename S, typename U, typename P, typename A>
+bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
return false;
}
-template<typename S, typename U, typename P, typename SD, typename A>
-uint64_t update_tuple_sketch<S, U, P, SD, A>::get_theta64() const {
+template<typename S, typename U, typename P, typename A>
+uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
return map_.theta_;
}
-template<typename S, typename U, typename P, typename SD, typename A>
-uint32_t update_tuple_sketch<S, U, P, SD, A>::get_num_retained() const {
+template<typename S, typename U, typename P, typename A>
+uint32_t update_tuple_sketch<S, U, P, A>::get_num_retained() const {
return map_.num_entries_;
}
-template<typename S, typename U, typename P, typename SD, typename A>
-uint16_t update_tuple_sketch<S, U, P, SD, A>::get_seed_hash() const {
+template<typename S, typename U, typename P, typename A>
+uint16_t update_tuple_sketch<S, U, P, A>::get_seed_hash() const {
return compute_seed_hash(map_.seed_);
}
-template<typename S, typename U, typename P, typename SD, typename A>
-uint8_t update_tuple_sketch<S, U, P, SD, A>::get_lg_k() const {
+template<typename S, typename U, typename P, typename A>
+uint8_t update_tuple_sketch<S, U, P, A>::get_lg_k() const {
return map_.lg_nom_size_;
}
-template<typename S, typename U, typename P, typename SD, typename A>
-auto update_tuple_sketch<S, U, P, SD, A>::get_rf() const -> resize_factor {
+template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
return map_.rf_;
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(const std::string& key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
if (key.empty()) return;
update(key.c_str(), key.length(), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(uint64_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
update(&key, sizeof(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(int64_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
update(&key, sizeof(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(uint32_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
update(static_cast<int32_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(int32_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
update(static_cast<int64_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(uint16_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
update(static_cast<int16_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(int16_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
update(static_cast<int64_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(uint8_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
update(static_cast<int8_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(double key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
update(canonical_double(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(float key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
update(static_cast<double>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(int8_t key, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
update(static_cast<int64_t>(key), std::forward<UU>(value));
}
-template<typename S, typename U, typename P, typename SD, typename A>
+template<typename S, typename U, typename P, typename A>
template<typename UU>
-void update_tuple_sketch<S, U, P, SD, A>::update(const void* key, size_t length, UU&& value) {
+void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
const uint64_t hash = map_.hash_and_screen(key, length);
if (hash == 0) return;
auto result = map_.find(hash);
@@ -175,13 +175,13 @@ void update_tuple_sketch<S, U, P, SD, A>::update(const void* key, size_t length,
}
}
-template<typename S, typename U, typename P, typename SD, typename A>
-void update_tuple_sketch<S, U, P, SD, A>::trim() {
+template<typename S, typename U, typename P, typename A>
+void update_tuple_sketch<S, U, P, A>::trim() {
map_.trim();
}
-template<typename S, typename U, typename P, typename SD, typename A>
-string<A> update_tuple_sketch<S, U, P, SD, A>::to_string(bool detail) const {
+template<typename S, typename U, typename P, typename A>
+string<A> update_tuple_sketch<S, U, P, A>::to_string(bool detail) const {
std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
auto type = typeid(*this).name();
os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
@@ -197,25 +197,25 @@ string<A> update_tuple_sketch<S, U, P, SD, A>::to_string(bool detail) const {
return os.str();
}
-template<typename S, typename U, typename P, typename SD, typename A>
-auto update_tuple_sketch<S, U, P, SD, A>::begin() const -> const_iterator {
+template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
}
-template<typename S, typename U, typename P, typename SD, typename A>
-auto update_tuple_sketch<S, U, P, SD, A>::end() const -> const_iterator {
+template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
}
-template<typename S, typename U, typename P, typename SD, typename A>
-compact_tuple_sketch<S, SD, A> update_tuple_sketch<S, U, P, SD, A>::compact(bool ordered) const {
- return compact_tuple_sketch<S, SD, A>(*this, ordered);
+template<typename S, typename U, typename P, typename A>
+compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered) const {
+ return compact_tuple_sketch<S, A>(*this, ordered);
}
// compact sketch
-template<typename S, typename SD, typename A>
-compact_tuple_sketch<S, SD, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries):
+template<typename S, typename A>
+compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries):
is_empty_(is_empty),
is_ordered_(is_ordered),
seed_hash_(seed_hash),
@@ -223,8 +223,8 @@ theta_(theta),
entries_(std::move(entries))
{}
-template<typename S, typename SD, typename A>
-compact_tuple_sketch<S, SD, A>::compact_tuple_sketch(const Base& other, bool ordered):
+template<typename S, typename A>
+compact_tuple_sketch<S, A>::compact_tuple_sketch(const Base& other, bool ordered):
is_empty_(other.is_empty()),
is_ordered_(other.is_ordered() || ordered),
seed_hash_(other.get_seed_hash()),
@@ -236,33 +236,33 @@ entries_()
if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
}
-template<typename S, typename SD, typename A>
-bool compact_tuple_sketch<S, SD, A>::is_empty() const {
+template<typename S, typename A>
+bool compact_tuple_sketch<S, A>::is_empty() const {
return is_empty_;
}
-template<typename S, typename SD, typename A>
-bool compact_tuple_sketch<S, SD, A>::is_ordered() const {
+template<typename S, typename A>
+bool compact_tuple_sketch<S, A>::is_ordered() const {
return is_ordered_;
}
-template<typename S, typename SD, typename A>
-uint64_t compact_tuple_sketch<S, SD, A>::get_theta64() const {
+template<typename S, typename A>
+uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
return theta_;
}
-template<typename S, typename SD, typename A>
-uint32_t compact_tuple_sketch<S, SD, A>::get_num_retained() const {
+template<typename S, typename A>
+uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
return entries_.size();
}
-template<typename S, typename SD, typename A>
-uint16_t compact_tuple_sketch<S, SD, A>::get_seed_hash() const {
+template<typename S, typename A>
+uint16_t compact_tuple_sketch<S, A>::get_seed_hash() const {
return seed_hash_;
}
-template<typename S, typename SD, typename A>
-string<A> compact_tuple_sketch<S, SD, A>::to_string(bool detail) const {
+template<typename S, typename A>
+string<A> compact_tuple_sketch<S, A>::to_string(bool detail) const {
std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
os << "### Compact Tuple sketch summary:" << std::endl;
auto type = typeid(*this).name();
@@ -293,33 +293,38 @@ string<A> compact_tuple_sketch<S, SD, A>::to_string(bool detail) const {
}
// implementation for fixed-size arithmetic types (integral and floating point)
-template<typename S, typename SD, typename A>
-template<typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
-size_t compact_tuple_sketch<S, SD, A>::get_serialized_size_summaries_bytes() const {
+template<typename S, typename A>
+template<typename SD, typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
+size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
+ unused(sd);
return entries_.size() * sizeof(SS);
}
// implementation for all other types (non-arithmetic)
-template<typename S, typename SD, typename A>
-template<typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
-size_t compact_tuple_sketch<S, SD, A>::get_serialized_size_summaries_bytes() const {
+template<typename S, typename A>
+template<typename SD, typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
+size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
size_t size = 0;
for (const auto& it: entries_) {
- size += SD().size_of_item(it.second);
+ size += sd.size_of_item(it.second);
}
return size;
}
-template<typename S, typename SD, typename A>
-void compact_tuple_sketch<S, SD, A>::serialize(std::ostream& os) const {
+template<typename S, typename A>
+template<typename SD>
+void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SD& sd) const {
+ unused(os);
+ unused(sd);
}
-template<typename S, typename SD, typename A>
-auto compact_tuple_sketch<S, SD, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
+template<typename S, typename A>
+template<typename SD>
+auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const SD& sd) const -> vector_bytes {
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
- + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes();
+ + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes(sd);
vector_bytes bytes(size);
uint8_t* ptr = bytes.data() + header_size_bytes;
const uint8_t* end_ptr = ptr + size;
@@ -354,30 +359,89 @@ auto compact_tuple_sketch<S, SD, A>::serialize(unsigned header_size_bytes) const
ptr += copy_to_mem(&it.first, ptr, sizeof(uint64_t));
}
for (const auto& it: entries_) {
- ptr += SD().serialize(ptr, end_ptr - ptr, &it.second, 1);
+ ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
}
}
return bytes;
}
-template<typename S, typename SD, typename A>
-auto compact_tuple_sketch<S, SD, A>::begin() const -> const_iterator {
+template<typename S, typename A>
+template<typename SD>
+compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* bytes, size_t size, uint64_t seed, const SD& sd) {
+ ensure_minimum_memory(size, 8);
+ const char* ptr = static_cast<const char*>(bytes);
+ const char* base = ptr;
+ uint8_t preamble_longs;
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
+ uint8_t serial_version;
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
+ uint8_t type;
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
+ uint16_t unused16;
+ ptr += copy_from_mem(ptr, &unused16, sizeof(unused16));
+ uint8_t flags_byte;
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
+ uint16_t seed_hash;
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
+// theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
+// theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
+ const bool is_empty = flags_byte & (1 << Base::flags::IS_EMPTY);
+// if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
+
+ uint64_t theta = theta_constants::MAX_THETA;
+ uint32_t num_entries = 0;
+
+ if (!is_empty) {
+ if (preamble_longs == 1) {
+ num_entries = 1;
+ } else {
+ ensure_minimum_memory(size, 8); // read the first prelong before this method
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
+ uint32_t unused32;
+ ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
+ if (preamble_longs > 2) {
+ ensure_minimum_memory(size, (preamble_longs - 1) << 3);
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
+ }
+ }
+ }
+ const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
+ //check_memory_size(ptr - base + keys_size_bytes, size);
+ ensure_minimum_memory(size, ptr - base + keys_size_bytes);
+ std::vector<Entry, AllocEntry> entries;
+ if (!is_empty) {
+ entries.reserve(num_entries);
+ std::vector<uint64_t, AllocU64> keys(num_entries);
+ ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
+ std::unique_ptr<S, deleter_of_summaries> summaries(A().allocate(num_entries), deleter_of_summaries(num_entries, false));
+ ptr += sd.deserialize(ptr, base + size - ptr, summaries.get(), num_entries);
+ summaries.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
+ for (size_t i = 0; i < num_entries; ++i) {
+ entries.push_back(Entry(keys[i], std::move(summaries.get()[i])));
+ }
+ }
+ const bool is_ordered = flags_byte & (1 << Base::flags::IS_ORDERED);
+ return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
+}
+
+template<typename S, typename A>
+auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
return const_iterator(entries_.data(), entries_.size(), 0);
}
-template<typename S, typename SD, typename A>
-auto compact_tuple_sketch<S, SD, A>::end() const -> const_iterator {
+template<typename S, typename A>
+auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
return const_iterator(nullptr, 0, entries_.size());
}
// builder
-template<typename S, typename U, typename P, typename SD, typename A>
-update_tuple_sketch<S, U, P, SD, A>::builder::builder(const P& policy):
+template<typename S, typename U, typename P, typename A>
+update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy):
policy_(policy) {}
-template<typename S, typename U, typename P, typename SD, typename A>
-update_tuple_sketch<S, U, P, SD, A> update_tuple_sketch<S, U, P, SD, A>::builder::build() const {
+template<typename S, typename U, typename P, typename A>
+auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
}
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
index 1169b72..bbb0593 100644
--- a/tuple/include/tuple_union.hpp
+++ b/tuple/include/tuple_union.hpp
@@ -37,7 +37,6 @@ struct default_union_policy {
template<
typename Summary,
typename Policy = default_union_policy<Summary>,
- typename SerDe = serde<Summary>,
typename Allocator = std::allocator<Summary>
>
class tuple_union {
@@ -45,8 +44,8 @@ public:
using Entry = std::pair<uint64_t, Summary>;
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using ExtractKey = pair_extract_key<uint64_t, Summary>;
- using Sketch = tuple_sketch<Summary, SerDe, Allocator>;
- using CompactSketch = compact_tuple_sketch<Summary, SerDe, Allocator>;
+ using Sketch = tuple_sketch<Summary, Allocator>;
+ using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
using resize_factor = theta_constants::resize_factor;
// reformulate the external policy that operates on Summary
@@ -85,8 +84,8 @@ private:
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
};
-template<typename S, typename P, typename SD, typename A>
-class tuple_union<S, P, SD, A>::builder: public theta_base_builder<builder> {
+template<typename S, typename P, typename A>
+class tuple_union<S, P, A>::builder: public theta_base_builder<builder> {
public:
/**
* Creates and instance of the builder with default parameters.
@@ -97,7 +96,7 @@ public:
* This is to create an instance of the union with predefined parameters.
* @return an instance of the union
*/
- tuple_union<S, P, SD, A> build() const;
+ tuple_union build() const;
private:
P policy_;
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
index 5d58296..aa45a31 100644
--- a/tuple/include/tuple_union_impl.hpp
+++ b/tuple/include/tuple_union_impl.hpp
@@ -19,27 +19,27 @@
namespace datasketches {
-template<typename S, typename P, typename SD, typename A>
-tuple_union<S, P, SD, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+template<typename S, typename P, typename A>
+tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
state_(lg_cur_size, lg_nom_size, rf, p, seed, internal_policy(policy))
{}
-template<typename S, typename P, typename SD, typename A>
-void tuple_union<S, P, SD, A>::update(const Sketch& sketch) {
+template<typename S, typename P, typename A>
+void tuple_union<S, P, A>::update(const Sketch& sketch) {
state_.update(sketch);
}
-template<typename S, typename P, typename SD, typename A>
-auto tuple_union<S, P, SD, A>::get_result(bool ordered) const -> CompactSketch {
+template<typename S, typename P, typename A>
+auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
return state_.get_result(ordered);
}
-template<typename S, typename P, typename SD, typename A>
-tuple_union<S, P, SD, A>::builder::builder(const P& policy):
+template<typename S, typename P, typename A>
+tuple_union<S, P, A>::builder::builder(const P& policy):
policy_(policy) {}
-template<typename S, typename P, typename SD, typename A>
-auto tuple_union<S, P, SD, A>::builder::build() const -> tuple_union {
+template<typename S, typename P, typename A>
+auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
}
diff --git a/tuple/test/tuple_sketch_allocation_test.cpp b/tuple/test/tuple_sketch_allocation_test.cpp
index 227c9bd..efbacc4 100644
--- a/tuple/test/tuple_sketch_allocation_test.cpp
+++ b/tuple/test/tuple_sketch_allocation_test.cpp
@@ -26,7 +26,7 @@
namespace datasketches {
using update_tuple_sketch_int_alloc =
- update_tuple_sketch<int, int, default_update_policy<int, int>, serde<int>, test_allocator<int>>;
+ update_tuple_sketch<int, int, default_update_policy<int, int>, test_allocator<int>>;
TEST_CASE("tuple sketch with test allocator: exact mode", "[tuple_sketch]") {
test_allocator_total_bytes = 0;
diff --git a/tuple/test/tuple_sketch_test.cpp b/tuple/test/tuple_sketch_test.cpp
index 684379d..2117eb7 100644
--- a/tuple/test/tuple_sketch_test.cpp
+++ b/tuple/test/tuple_sketch_test.cpp
@@ -75,7 +75,7 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
update_sketch.update(1, 1);
update_sketch.update(2, 2);
update_sketch.update(1, 1);
- std::cout << update_sketch.to_string(true);
+// std::cout << update_sketch.to_string(true);
REQUIRE(!update_sketch.is_empty());
REQUIRE(!update_sketch.is_estimation_mode());
REQUIRE(update_sketch.get_estimate() == 2);
@@ -92,7 +92,7 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
REQUIRE(count == 2);
auto compact_sketch = update_sketch.compact();
- std::cout << compact_sketch.to_string(true);
+// std::cout << compact_sketch.to_string(true);
REQUIRE(!compact_sketch.is_empty());
REQUIRE(!compact_sketch.is_estimation_mode());
REQUIRE(compact_sketch.get_estimate() == 2);
@@ -107,6 +107,18 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
++count;
}
REQUIRE(count == 2);
+
+ auto bytes = compact_sketch.serialize();
+ auto deserialized_sketch = compact_tuple_sketch<float>::deserialize(bytes.data(), bytes.size());
+ REQUIRE(!deserialized_sketch.is_empty());
+ REQUIRE(!deserialized_sketch.is_estimation_mode());
+ REQUIRE(deserialized_sketch.get_estimate() == 2);
+ REQUIRE(deserialized_sketch.get_lower_bound(1) == 2);
+ REQUIRE(deserialized_sketch.get_upper_bound(1) == 2);
+ REQUIRE(deserialized_sketch.get_theta() == 1);
+ REQUIRE(deserialized_sketch.get_num_retained() == 2);
+ REQUIRE(deserialized_sketch.is_ordered());
+// std::cout << deserialized_sketch.to_string(true);
}
template<typename T>
@@ -119,7 +131,7 @@ private:
T initial_value;
};
-typedef update_tuple_sketch<float, float, max_value_policy<float>> max_float_update_tuple_sketch;
+using max_float_update_tuple_sketch = update_tuple_sketch<float, float, max_value_policy<float>>;
TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") {
auto update_sketch = max_float_update_tuple_sketch::builder(max_value_policy<float>(5)).build();
@@ -128,7 +140,7 @@ TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") {
update_sketch.update(2, 10);
update_sketch.update(3, 3);
update_sketch.update(3, 7);
- std::cout << update_sketch.to_string(true);
+// std::cout << update_sketch.to_string(true);
int count = 0;
float sum = 0;
for (const auto& entry: update_sketch) {
@@ -154,12 +166,12 @@ struct test_type_replace_policy {
};
TEST_CASE("tuple sketch: test type with replace policy", "[tuple_sketch]") {
- auto sketch = update_tuple_sketch<test_type, test_type, test_type_replace_policy, test_type_serde>::builder().build();
+ auto sketch = update_tuple_sketch<test_type, test_type, test_type_replace_policy>::builder().build();
test_type a(1);
sketch.update(1, a); // this should copy
sketch.update(2, 2); // this should move
sketch.update(1, 2); // this should move
- std::cout << sketch.to_string(true);
+// std::cout << sketch.to_string(true);
REQUIRE(sketch.get_num_retained() == 2);
for (const auto& entry: sketch) {
REQUIRE(entry.second.get_value() == 2);
@@ -181,14 +193,15 @@ TEST_CASE("tuple sketch: array of doubles", "[tuple_sketch]") {
using three_doubles_update_tuple_sketch = update_tuple_sketch<three_doubles, three_doubles, three_doubles_update_policy>;
auto update_sketch = three_doubles_update_tuple_sketch::builder().build();
update_sketch.update(1, three_doubles(1, 2, 3));
- std::cout << update_sketch.to_string(true);
+// std::cout << update_sketch.to_string(true);
const auto& entry = *update_sketch.begin();
REQUIRE(std::get<0>(entry.second) == 1.0);
REQUIRE(std::get<1>(entry.second) == 2.0);
REQUIRE(std::get<2>(entry.second) == 3.0);
auto compact_sketch = update_sketch.compact();
- std::cout << compact_sketch.to_string(true);
+// std::cout << compact_sketch.to_string(true);
+ REQUIRE(compact_sketch.get_num_retained() == 1);
}
TEST_CASE("tuple sketch: float, update with different types of keys", "[tuple_sketch]") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org