You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/10 22:35:51 UTC

[incubator-datasketches-cpp] branch req_sketch updated: copy and move, serialization fixes, tests

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/req_sketch by this push:
     new de2e6cb  copy and move, serialization fixes, tests
de2e6cb is described below

commit de2e6cb107e6f38e870690461bb3c1485ed36a6b
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 10 14:35:40 2020 -0800

    copy and move, serialization fixes, tests
---
 req/include/req_sketch.hpp      |  5 ++-
 req/include/req_sketch_impl.hpp | 87 +++++++++++++++++++++++++++++++++--------
 req/test/req_sketch_test.cpp    | 62 ++++++++++++++++++++++++++---
 3 files changed, 130 insertions(+), 24 deletions(-)

diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 117182a..2c25958 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -40,7 +40,10 @@ public:
 
   explicit req_sketch(uint16_t k, const Allocator& allocator = Allocator());
   ~req_sketch();
-  // TODO: copy, move, assign
+  req_sketch(const req_sketch& other);
+  req_sketch(req_sketch&& other) noexcept;
+  req_sketch& operator=(const req_sketch& other);
+  req_sketch& operator=(req_sketch&& other);
 
   /**
    * Returns true if this sketch is empty.
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index bf2ce7b..57fec03 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -51,6 +51,63 @@ req_sketch<T, H, C, S, A>::~req_sketch() {
 }
 
 template<typename T, bool H, typename C, typename S, typename A>
+req_sketch<T, H, C, S, A>::req_sketch(const req_sketch& other):
+allocator_(other.allocator_),
+k_(other.k_),
+max_nom_size_(other.max_nom_size_),
+num_retained_(other.num_retained_),
+n_(other.n_),
+compactors_(other.compactors_),
+min_value_(nullptr),
+max_value_(nullptr)
+{
+  if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
+  if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
+req_sketch<T, H, C, S, A>::req_sketch(req_sketch&& other) noexcept :
+allocator_(std::move(other.allocator_)),
+k_(other.k_),
+max_nom_size_(other.max_nom_size_),
+num_retained_(other.num_retained_),
+n_(other.n_),
+compactors_(std::move(other.compactors_)),
+min_value_(other.min_value_),
+max_value_(other.max_value_)
+{
+  other.min_value_ = nullptr;
+  other.max_value_ = nullptr;
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
+req_sketch<T, H, C, S, A>& req_sketch<T, H, C, S, A>::operator=(const req_sketch& other) {
+  req_sketch copy(other);
+  std::swap(allocator_, copy.allocator_);
+  std::swap(k_, copy.k_);
+  std::swap(max_nom_size_, copy.max_nom_size_);
+  std::swap(num_retained_, copy.num_retained_);
+  std::swap(n_, copy.n_);
+  std::swap(compactors_, copy.compactors_);
+  std::swap(min_value_, copy.min_value_);
+  std::swap(max_value_, copy.max_value_);
+  return *this;
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
+req_sketch<T, H, C, S, A>& req_sketch<T, H, C, S, A>::operator=(req_sketch&& other) {
+  std::swap(allocator_, other.allocator_);
+  std::swap(k_, other.k_);
+  std::swap(max_nom_size_, other.max_nom_size_);
+  std::swap(num_retained_, other.num_retained_);
+  std::swap(n_, other.n_);
+  std::swap(compactors_, other.compactors_);
+  std::swap(min_value_, other.min_value_);
+  std::swap(max_value_, other.max_value_);
+  return *this;
+}
+
+template<typename T, bool H, typename C, typename S, typename A>
 bool req_sketch<T, H, C, S, A>::is_empty() const {
   return n_ == 0;
 }
@@ -121,7 +178,6 @@ const T& req_sketch<T, H, C, S, A>::get_quantile(double rank) const {
   if ((rank < 0.0) || (rank > 1.0)) {
     throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
   }
-  // TODO: min and max
   if (!compactors_[0].is_sorted()) {
     const_cast<req_compactor<T, H, C, A>&>(compactors_[0]).sort(); // allow this side effect
   }
@@ -135,8 +191,8 @@ const T& req_sketch<T, H, C, S, A>::get_quantile(double rank) const {
 
 template<typename T, bool H, typename C, typename S, typename A>
 void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
-  const uint8_t preamble_longs = 1;
-  write(os, preamble_longs);
+  const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
+  write(os, preamble_ints);
   const uint8_t serial_version = SERIAL_VERSION;
   write(os, serial_version);
   const uint8_t family = FAMILY;
@@ -144,7 +200,7 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
   const bool is_single_item = n_ == 1;
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
-      | (H ? 1 << flags::IS_HIGH_RANK : 0)
+    | (H ? 1 << flags::IS_HIGH_RANK : 0)
     | (compactors_[0].is_sorted() ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
     | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
   );
@@ -169,13 +225,13 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
 
 template<typename T, bool H, typename C, typename S, typename A>
 req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& is, const A& allocator) {
-  const auto preamble_longs = read<uint8_t>(is);
+  const auto preamble_ints = read<uint8_t>(is);
   const auto serial_version = read<uint8_t>(is);
   const auto family_id = read<uint8_t>(is);
   const auto flags_byte = read<uint8_t>(is);
   const auto k = read<uint16_t>(is);
   const auto num_levels = read<uint8_t>(is);
-  const auto unused = read<uint8_t>(is);
+  read<uint8_t>(is); // unused byte
 
   // TODO: checks
 
@@ -183,9 +239,6 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
   const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
   if (is_empty) return req_sketch(k, allocator);
 
-  uint64_t n = 1;
-  if (num_levels > 1) n = read<uint64_t>(is);
-
   A alloc(allocator);
   auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
   std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
@@ -194,7 +247,12 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
   std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
 
   const bool is_single_item = flags_byte & (1 << flags::IS_SINGLE_ITEM);
+  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
+  std::vector<Compactor, AllocCompactor> compactors(allocator);
+
+  uint64_t n = 1;
   if (num_levels > 1) {
+    n = read<uint64_t>(is);
     S().deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
@@ -203,9 +261,6 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
 
-  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
-  std::vector<Compactor, AllocCompactor> compactors(allocator);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> item_buffer(alloc.allocate(1), item_buffer_deleter);
   if (is_single_item) {
     S().deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
@@ -215,6 +270,9 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
     compactors.push_back(req_compactor<T, H, C, A>(1, k, allocator, min_value.get(), is_level_0_sorted));
   } else {
+    for (size_t i = 0; i < num_levels; ++i) {
+      compactors.push_back(req_compactor<T, H, C, A>::deserialize(is, S(), allocator, i == 0 ? is_level_0_sorted : true));
+    }
     if (num_levels == 1) {
       const auto& items = compactors[0].get_items();
       n = items.size();
@@ -232,11 +290,6 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
       new (max_value_buffer.get()) T(*max_it);
       // copy did not throw, repackage with destrtuctor
       max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
-    } else {
-      for (size_t i = 0; i < num_levels; ++i) {
-        auto compactor = req_compactor<T, H, C, A>::deserialize(is, S(), allocator, i == 0 ? is_level_0_sorted : true);
-        compactors.push_back(std::move(compactor));
-      }
     }
   }
 
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 20fd34b..4520855 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -134,21 +134,71 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
   REQUIRE(sketch.get_max_value() == n - 1);
 }
 
+TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
+  req_sketch<float, true> sketch(100);
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sketch.serialize(s);
+  auto sketch2 = req_sketch<float, true>::deserialize(s);
+  REQUIRE(s.tellg() == s.tellp());
+  REQUIRE(sketch2.is_empty() == sketch.is_empty());
+  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+  REQUIRE(sketch2.get_n() == sketch.get_n());
+  REQUIRE(std::isnan(sketch2.get_min_value()));
+  REQUIRE(std::isnan(sketch2.get_max_value()));
+}
+
+TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
+  req_sketch<float, true> sketch(100);
+  sketch.update(1);
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sketch.serialize(s);
+  auto sketch2 = req_sketch<float, true>::deserialize(s);
+  REQUIRE(s.tellg() == s.tellp());
+  REQUIRE(sketch2.is_empty() == sketch.is_empty());
+  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+  REQUIRE(sketch2.get_n() == sketch.get_n());
+  REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
+  REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+}
+
+TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
+  req_sketch<float, true> sketch(100);
+  const size_t n = 50;
+  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sketch.serialize(s);
+  auto sketch2 = req_sketch<float, true>::deserialize(s);
+  REQUIRE(s.tellg() == s.tellp());
+  REQUIRE(sketch2.is_empty() == sketch.is_empty());
+  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+  REQUIRE(sketch2.get_n() == sketch.get_n());
+  REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
+  REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+}
+
 TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
   req_sketch<float, true> sketch(100);
   const size_t n = 100000;
   for (size_t i = 0; i < n; ++i) sketch.update(i);
+  REQUIRE(sketch.is_estimation_mode());
 
   std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
   sketch.serialize(s);
   auto sketch2 = req_sketch<float, true>::deserialize(s);
   REQUIRE(s.tellg() == s.tellp());
-  REQUIRE(sketch.is_empty() == sketch2.is_empty());
-  REQUIRE(sketch.is_estimation_mode() == sketch2.is_estimation_mode());
-  REQUIRE(sketch.get_num_retained() == sketch2.get_num_retained());
-  REQUIRE(sketch.get_n() == sketch2.get_n());
-  REQUIRE(sketch.get_min_value() == sketch2.get_min_value());
-  REQUIRE(sketch.get_max_value() == sketch2.get_max_value());
+  REQUIRE(sketch2.is_empty() == sketch.is_empty());
+  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+  REQUIRE(sketch2.get_n() == sketch.get_n());
+  REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
+  REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
 }
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org