You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/17 21:54:36 UTC

[incubator-datasketches-cpp] branch req_sketch updated: serialization compatibility

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/req_sketch by this push:
     new 458b94d  serialization compatibility
458b94d is described below

commit 458b94da5d3b1e2d9440b27ebb2971f821e243ef
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 17 13:49:13 2020 -0800

    serialization compatibility
---
 req/include/req_sketch.hpp                  |   2 +-
 req/include/req_sketch_impl.hpp             |  21 ++++----
 req/test/CMakeLists.txt                     |   4 +-
 req/test/req_float_empty_from_java.sk       | Bin 0 -> 8 bytes
 req/test/req_float_estimation_from_java.sk  | Bin 0 -> 11872 bytes
 req/test/req_float_single_item_from_java.sk | Bin 0 -> 12 bytes
 req/test/req_sketch_test.cpp                |  74 +++++++++++++++++++++++++++-
 7 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 2ca578a..aa17b2e 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -178,7 +178,7 @@ private:
   static const uint8_t SERIAL_VERSION = 1;
   static const uint8_t FAMILY = 17;
   static const size_t PREAMBLE_SIZE_BYTES = 8;
-  enum flags { RESERVED1, RESERVED2, IS_EMPTY, IS_HIGH_RANK, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM };
+  enum flags { RESERVED1, RESERVED2, IS_EMPTY, IS_HIGH_RANK, RAW_ITEMS, IS_LEVEL_ZERO_SORTED };
 
   uint8_t get_num_levels() const;
   void grow();
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index bf0b701..542a989 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -256,12 +256,12 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
   write(os, serial_version);
   const uint8_t family = FAMILY;
   write(os, family);
-  const bool is_single_item = n_ == 1;
+  const bool raw_items = n_ <= req_constants::MIN_K;
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
     | (H ? 1 << flags::IS_HIGH_RANK : 0)
+    | (raw_items ? 1 << flags::RAW_ITEMS : 0)
     | (compactors_[0].is_sorted() ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
-    | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
   );
   write(os, flags_byte);
   write(os, k_);
@@ -275,7 +275,7 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
     S().serialize(os, min_value_, 1);
     S().serialize(os, max_value_, 1);
   }
-  if (is_single_item) {
+  if (raw_items) {
     S().serialize(os, min_value_, 1);
   } else {
     for (const auto& compactor: compactors_) compactor.serialize(os, S());
@@ -295,12 +295,12 @@ auto req_sketch<T, H, C, S, A>::serialize(unsigned header_size_bytes) const -> v
   ptr += copy_to_mem(serial_version, ptr);
   const uint8_t family = FAMILY;
   ptr += copy_to_mem(family, ptr);
-  const bool is_single_item = n_ == 1;
+  const bool raw_items = n_ <= req_constants::MIN_K;
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
     | (H ? 1 << flags::IS_HIGH_RANK : 0)
+    | (raw_items ? 1 << flags::RAW_ITEMS : 0)
     | (compactors_[0].is_sorted() ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
-    | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
   );
   ptr += copy_to_mem(flags_byte, ptr);
   ptr += copy_to_mem(k_, ptr);
@@ -314,7 +314,7 @@ auto req_sketch<T, H, C, S, A>::serialize(unsigned header_size_bytes) const -> v
       ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
       ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
     }
-    if (is_single_item) {
+    if (raw_items) {
       ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
     } else {
       for (const auto& compactor: compactors_) ptr += compactor.serialize(ptr, end_ptr - ptr, S());
@@ -333,6 +333,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
   const auto num_levels = read<uint8_t>(is);
   read<uint8_t>(is); // unused byte
 
+  std::cout << "flags=" << std::hex << ((int)flags_byte) << "\n";
   // TODO: checks
 
   if (!is.good()) throw std::runtime_error("error reading from std::istream");
@@ -346,7 +347,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
   std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
   std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
 
-  const bool is_single_item = flags_byte & (1 << flags::IS_SINGLE_ITEM);
+  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
   const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
   std::vector<Compactor, AllocCompactor> compactors(allocator);
 
@@ -361,7 +362,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
 
-  if (is_single_item) {
+  if (raw_items) {
     S().deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
@@ -430,7 +431,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(const void* byt
   std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
   std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
 
-  const bool is_single_item = flags_byte & (1 << flags::IS_SINGLE_ITEM);
+  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
   const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
   std::vector<Compactor, AllocCompactor> compactors(allocator);
 
@@ -445,7 +446,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(const void* byt
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
 
-  if (is_single_item) {
+  if (raw_items) {
     ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
diff --git a/req/test/CMakeLists.txt b/req/test/CMakeLists.txt
index 42a1509..d9bc645 100755
--- a/req/test/CMakeLists.txt
+++ b/req/test/CMakeLists.txt
@@ -24,8 +24,8 @@ set_target_properties(req_test PROPERTIES
   CXX_STANDARD_REQUIRED YES
 )
 
-file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
-string(APPEND THETA_TEST_BINARY_PATH "/")
+file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" REQ_TEST_BINARY_PATH)
+string(APPEND REQ_TEST_BINARY_PATH "/")
 target_compile_definitions(req_test
   PRIVATE
     TEST_BINARY_INPUT_PATH="${REQ_TEST_BINARY_PATH}"
diff --git a/req/test/req_float_empty_from_java.sk b/req/test/req_float_empty_from_java.sk
new file mode 100644
index 0000000..9b24bcc
Binary files /dev/null and b/req/test/req_float_empty_from_java.sk differ
diff --git a/req/test/req_float_estimation_from_java.sk b/req/test/req_float_estimation_from_java.sk
new file mode 100644
index 0000000..d063b41
Binary files /dev/null and b/req/test/req_float_estimation_from_java.sk differ
diff --git a/req/test/req_float_single_item_from_java.sk b/req/test/req_float_single_item_from_java.sk
new file mode 100644
index 0000000..774db9f
Binary files /dev/null and b/req/test/req_float_single_item_from_java.sk differ
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index ec1fb6f..2953a8a 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -21,14 +21,16 @@
 
 #include <req_sketch.hpp>
 
+#include <fstream>
+#include <sstream>
 #include <limits>
 
 namespace datasketches {
 
 #ifdef TEST_BINARY_INPUT_PATH
-const std::string inputPath = TEST_BINARY_INPUT_PATH;
+const std::string input_path = TEST_BINARY_INPUT_PATH;
 #else
-const std::string inputPath = "test/";
+const std::string input_path = "test/";
 #endif
 
 TEST_CASE("req sketch: empty", "[req_sketch]") {
@@ -268,6 +270,74 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
   REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
 }
 
+TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
+  req_sketch<float, true> sketch(100);
+  const size_t n = 100000;
+  for (size_t i = 0; i < n; ++i) sketch.update(i);
+  REQUIRE(sketch.is_estimation_mode());
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sketch.serialize(s);
+  auto bytes = sketch.serialize();
+  REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
+  for (size_t i = 0; i < bytes.size(); ++i) {
+    REQUIRE(((char*)bytes.data())[i] == (char)s.get());
+  }
+
+  s.seekg(0); // rewind
+  auto sketch1 = req_sketch<float, true>::deserialize(s);
+  auto sketch2 = req_sketch<float, true>::deserialize(bytes.data(), bytes.size());
+  REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
+  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
+  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+  REQUIRE(sketch2.get_n() == sketch.get_n());
+  REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
+  REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary);
+  auto sketch = req_sketch<float, true>::deserialize(is);
+  std::cout << sketch.to_string();
+  REQUIRE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_n() == 0);
+  REQUIRE(sketch.get_num_retained() == 0);
+  REQUIRE(std::isnan(sketch.get_min_value()));
+  REQUIRE(std::isnan(sketch.get_max_value()));
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary);
+  auto sketch = req_sketch<float, true>::deserialize(is);
+  std::cout << sketch.to_string();
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_n() == 1);
+  REQUIRE(sketch.get_num_retained() == 1);
+  REQUIRE(sketch.get_min_value() == 1);
+  REQUIRE(sketch.get_max_value() == 1);
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary);
+  auto sketch = req_sketch<float, true>::deserialize(is);
+  std::cout << sketch.to_string();
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_n() == 10000);
+  REQUIRE(sketch.get_num_retained() == 2942);
+  REQUIRE(sketch.get_min_value() == 0);
+  REQUIRE(sketch.get_max_value() == 9999);
+}
+
 TEST_CASE("req sketch: merge", "[req_sketch]") {
   req_sketch<float, true> sketch1(100);
   for (size_t i = 0; i < 1000; ++i) sketch1.update(i);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org