You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2024/03/12 19:09:50 UTC
(datasketches-cpp) 01/01: added get_serialized_size_bytes()
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tdigest
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit a66776302077bd49705f0e945b62514a14ffebad
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Mar 12 12:09:37 2024 -0700
added get_serialized_size_bytes()
---
tdigest/include/tdigest.hpp | 13 ++++++++++---
tdigest/include/tdigest_impl.hpp | 18 +++++++++++++-----
2 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/tdigest/include/tdigest.hpp b/tdigest/include/tdigest.hpp
index d0c32d6..aaa8ae5 100644
--- a/tdigest/include/tdigest.hpp
+++ b/tdigest/include/tdigest.hpp
@@ -20,8 +20,9 @@
#ifndef _TDIGEST_HPP_
#define _TDIGEST_HPP_
-#include <type_traits>
+#include <cstddef>
#include <limits>
+#include <type_traits>
#include "common_defs.hpp"
@@ -165,6 +166,12 @@ public:
*/
string<Allocator> to_string(bool print_centroids = false) const;
+ /**
+ * Computes size needed to serialize the current state.
+ * @return size in bytes needed to serialize this tdigest
+ */
+ size_t get_serialized_size_bytes() const;
+
/**
* This method serializes t-Digest into a given stream in a binary form
* @param os output stream
@@ -222,12 +229,12 @@ private:
enum flags { IS_EMPTY, IS_SINGLE_VALUE, REVERSE_MERGE };
bool is_single_value() const;
+ uint8_t get_preamble_longs() const;
+ void merge_buffered();
// for deserialize
tdigest(bool reverse_merge, uint16_t k, T min, T max, vector_centroid&& centroids, uint64_t total_weight_, const Allocator& allocator);
- void merge_buffered();
-
static double weighted_average(double x1, double w1, double x2, double w2);
// for compatibility with format of the reference implementation
diff --git a/tdigest/include/tdigest_impl.hpp b/tdigest/include/tdigest_impl.hpp
index 1a48f88..c61e8d3 100644
--- a/tdigest/include/tdigest_impl.hpp
+++ b/tdigest/include/tdigest_impl.hpp
@@ -311,16 +311,24 @@ void tdigest<T, A>::serialize(std::ostream& os) const {
write(os, centroids_.data(), centroids_.size() * sizeof(centroid));
}
+template<typename T, typename A>
+uint8_t tdigest<T, A>::get_preamble_longs() const {
+ return is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
+}
+
+template<typename T, typename A>
+size_t tdigest<T, A>::get_serialized_size_bytes() const {
+ return get_preamble_longs() * sizeof(uint64_t) +
+ (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 + sizeof(centroid) * centroids_.size()));
+}
+
template<typename T, typename A>
auto tdigest<T, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
const_cast<tdigest*>(this)->merge_buffered(); // side effect
- const uint8_t preamble_longs = is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
- const size_t size_bytes = preamble_longs * sizeof(uint64_t) +
- (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 + sizeof(centroid) * centroids_.size()));
- vector_bytes bytes(size_bytes, 0, allocator_);
+ vector_bytes bytes(get_serialized_size_bytes(), 0, allocator_);
uint8_t* ptr = bytes.data() + header_size_bytes;
- *ptr++ = preamble_longs;
+ *ptr++ = get_preamble_longs();
*ptr++ = SERIAL_VERSION;
*ptr++ = SKETCH_TYPE;
ptr += copy_to_mem(k_, ptr);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org