You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2024/03/12 19:09:50 UTC

(datasketches-cpp) 01/01: added get_serialized_size_bytes()

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tdigest
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit a66776302077bd49705f0e945b62514a14ffebad
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Mar 12 12:09:37 2024 -0700

    added get_serialized_size_bytes()
---
 tdigest/include/tdigest.hpp      | 13 ++++++++++---
 tdigest/include/tdigest_impl.hpp | 18 +++++++++++++-----
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/tdigest/include/tdigest.hpp b/tdigest/include/tdigest.hpp
index d0c32d6..aaa8ae5 100644
--- a/tdigest/include/tdigest.hpp
+++ b/tdigest/include/tdigest.hpp
@@ -20,8 +20,9 @@
 #ifndef _TDIGEST_HPP_
 #define _TDIGEST_HPP_
 
-#include <type_traits>
+#include <cstddef>
 #include <limits>
+#include <type_traits>
 
 #include "common_defs.hpp"
 
@@ -165,6 +166,12 @@ public:
    */
   string<Allocator> to_string(bool print_centroids = false) const;
 
+  /**
+   * Computes size needed to serialize the current state.
+   * @return size in bytes needed to serialize this tdigest
+   */
+  size_t get_serialized_size_bytes() const;
+
   /**
    * This method serializes t-Digest into a given stream in a binary form
    * @param os output stream
@@ -222,12 +229,12 @@ private:
   enum flags { IS_EMPTY, IS_SINGLE_VALUE, REVERSE_MERGE };
 
   bool is_single_value() const;
+  uint8_t get_preamble_longs() const;
+  void merge_buffered();
 
   // for deserialize
   tdigest(bool reverse_merge, uint16_t k, T min, T max, vector_centroid&& centroids, uint64_t total_weight_, const Allocator& allocator);
 
-  void merge_buffered();
-
   static double weighted_average(double x1, double w1, double x2, double w2);
 
   // for compatibility with format of the reference implementation
diff --git a/tdigest/include/tdigest_impl.hpp b/tdigest/include/tdigest_impl.hpp
index 1a48f88..c61e8d3 100644
--- a/tdigest/include/tdigest_impl.hpp
+++ b/tdigest/include/tdigest_impl.hpp
@@ -311,16 +311,24 @@ void tdigest<T, A>::serialize(std::ostream& os) const {
   write(os, centroids_.data(), centroids_.size() * sizeof(centroid));
 }
 
+template<typename T, typename A>
+uint8_t tdigest<T, A>::get_preamble_longs() const {
+  return is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
+}
+
+template<typename T, typename A>
+size_t tdigest<T, A>::get_serialized_size_bytes() const {
+  return get_preamble_longs() * sizeof(uint64_t) +
+      (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 + sizeof(centroid) * centroids_.size()));
+}
+
 template<typename T, typename A>
 auto tdigest<T, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
   const_cast<tdigest*>(this)->merge_buffered(); // side effect
-  const uint8_t preamble_longs = is_empty() || is_single_value() ? PREAMBLE_LONGS_EMPTY_OR_SINGLE : PREAMBLE_LONGS_MULTIPLE;
-  const size_t size_bytes = preamble_longs * sizeof(uint64_t) +
-      (is_empty() ? 0 : (is_single_value() ? sizeof(T) : sizeof(T) * 2 + sizeof(centroid) * centroids_.size()));
-  vector_bytes bytes(size_bytes, 0, allocator_);
+  vector_bytes bytes(get_serialized_size_bytes(), 0, allocator_);
   uint8_t* ptr = bytes.data() + header_size_bytes;
 
-  *ptr++ = preamble_longs;
+  *ptr++ = get_preamble_longs();
   *ptr++ = SERIAL_VERSION;
   *ptr++ = SKETCH_TYPE;
   ptr += copy_to_mem(k_, ptr);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org