You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/17 21:54:36 UTC
[incubator-datasketches-cpp] branch req_sketch updated:
serialization compatibility
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/req_sketch by this push:
new 458b94d serialization compatibility
458b94d is described below
commit 458b94da5d3b1e2d9440b27ebb2971f821e243ef
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 17 13:49:13 2020 -0800
serialization compatibility
---
req/include/req_sketch.hpp | 2 +-
req/include/req_sketch_impl.hpp | 21 ++++----
req/test/CMakeLists.txt | 4 +-
req/test/req_float_empty_from_java.sk | Bin 0 -> 8 bytes
req/test/req_float_estimation_from_java.sk | Bin 0 -> 11872 bytes
req/test/req_float_single_item_from_java.sk | Bin 0 -> 12 bytes
req/test/req_sketch_test.cpp | 74 +++++++++++++++++++++++++++-
7 files changed, 86 insertions(+), 15 deletions(-)
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 2ca578a..aa17b2e 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -178,7 +178,7 @@ private:
static const uint8_t SERIAL_VERSION = 1;
static const uint8_t FAMILY = 17;
static const size_t PREAMBLE_SIZE_BYTES = 8;
- enum flags { RESERVED1, RESERVED2, IS_EMPTY, IS_HIGH_RANK, IS_LEVEL_ZERO_SORTED, IS_SINGLE_ITEM };
+ enum flags { RESERVED1, RESERVED2, IS_EMPTY, IS_HIGH_RANK, RAW_ITEMS, IS_LEVEL_ZERO_SORTED };
uint8_t get_num_levels() const;
void grow();
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index bf0b701..542a989 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -256,12 +256,12 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
write(os, serial_version);
const uint8_t family = FAMILY;
write(os, family);
- const bool is_single_item = n_ == 1;
+ const bool raw_items = n_ <= req_constants::MIN_K;
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
| (H ? 1 << flags::IS_HIGH_RANK : 0)
+ | (raw_items ? 1 << flags::RAW_ITEMS : 0)
| (compactors_[0].is_sorted() ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
- | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
);
write(os, flags_byte);
write(os, k_);
@@ -275,7 +275,7 @@ void req_sketch<T, H, C, S, A>::serialize(std::ostream& os) const {
S().serialize(os, min_value_, 1);
S().serialize(os, max_value_, 1);
}
- if (is_single_item) {
+ if (raw_items) {
S().serialize(os, min_value_, 1);
} else {
for (const auto& compactor: compactors_) compactor.serialize(os, S());
@@ -295,12 +295,12 @@ auto req_sketch<T, H, C, S, A>::serialize(unsigned header_size_bytes) const -> v
ptr += copy_to_mem(serial_version, ptr);
const uint8_t family = FAMILY;
ptr += copy_to_mem(family, ptr);
- const bool is_single_item = n_ == 1;
+ const bool raw_items = n_ <= req_constants::MIN_K;
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
| (H ? 1 << flags::IS_HIGH_RANK : 0)
+ | (raw_items ? 1 << flags::RAW_ITEMS : 0)
| (compactors_[0].is_sorted() ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
- | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
);
ptr += copy_to_mem(flags_byte, ptr);
ptr += copy_to_mem(k_, ptr);
@@ -314,7 +314,7 @@ auto req_sketch<T, H, C, S, A>::serialize(unsigned header_size_bytes) const -> v
ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
}
- if (is_single_item) {
+ if (raw_items) {
ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
} else {
for (const auto& compactor: compactors_) ptr += compactor.serialize(ptr, end_ptr - ptr, S());
@@ -333,6 +333,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
const auto num_levels = read<uint8_t>(is);
read<uint8_t>(is); // unused byte
+ std::cout << "flags=" << std::hex << ((int)flags_byte) << "\n";
// TODO: checks
if (!is.good()) throw std::runtime_error("error reading from std::istream");
@@ -346,7 +347,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
- const bool is_single_item = flags_byte & (1 << flags::IS_SINGLE_ITEM);
+ const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
std::vector<Compactor, AllocCompactor> compactors(allocator);
@@ -361,7 +362,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(std::istream& i
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
}
- if (is_single_item) {
+ if (raw_items) {
S().deserialize(is, min_value_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
@@ -430,7 +431,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(const void* byt
std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
- const bool is_single_item = flags_byte & (1 << flags::IS_SINGLE_ITEM);
+ const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
std::vector<Compactor, AllocCompactor> compactors(allocator);
@@ -445,7 +446,7 @@ req_sketch<T, H, C, S, A> req_sketch<T, H, C, S, A>::deserialize(const void* byt
max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
}
- if (is_single_item) {
+ if (raw_items) {
ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
diff --git a/req/test/CMakeLists.txt b/req/test/CMakeLists.txt
index 42a1509..d9bc645 100755
--- a/req/test/CMakeLists.txt
+++ b/req/test/CMakeLists.txt
@@ -24,8 +24,8 @@ set_target_properties(req_test PROPERTIES
CXX_STANDARD_REQUIRED YES
)
-file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
-string(APPEND THETA_TEST_BINARY_PATH "/")
+file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" REQ_TEST_BINARY_PATH)
+string(APPEND REQ_TEST_BINARY_PATH "/")
target_compile_definitions(req_test
PRIVATE
TEST_BINARY_INPUT_PATH="${REQ_TEST_BINARY_PATH}"
diff --git a/req/test/req_float_empty_from_java.sk b/req/test/req_float_empty_from_java.sk
new file mode 100644
index 0000000..9b24bcc
Binary files /dev/null and b/req/test/req_float_empty_from_java.sk differ
diff --git a/req/test/req_float_estimation_from_java.sk b/req/test/req_float_estimation_from_java.sk
new file mode 100644
index 0000000..d063b41
Binary files /dev/null and b/req/test/req_float_estimation_from_java.sk differ
diff --git a/req/test/req_float_single_item_from_java.sk b/req/test/req_float_single_item_from_java.sk
new file mode 100644
index 0000000..774db9f
Binary files /dev/null and b/req/test/req_float_single_item_from_java.sk differ
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index ec1fb6f..2953a8a 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -21,14 +21,16 @@
#include <req_sketch.hpp>
+#include <fstream>
+#include <sstream>
#include <limits>
namespace datasketches {
#ifdef TEST_BINARY_INPUT_PATH
-const std::string inputPath = TEST_BINARY_INPUT_PATH;
+const std::string input_path = TEST_BINARY_INPUT_PATH;
#else
-const std::string inputPath = "test/";
+const std::string input_path = "test/";
#endif
TEST_CASE("req sketch: empty", "[req_sketch]") {
@@ -268,6 +270,74 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
}
+TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
+ req_sketch<float, true> sketch(100);
+ const size_t n = 100000;
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
+ REQUIRE(sketch.is_estimation_mode());
+
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+ sketch.serialize(s);
+ auto bytes = sketch.serialize();
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
+ for (size_t i = 0; i < bytes.size(); ++i) {
+ REQUIRE(((char*)bytes.data())[i] == (char)s.get());
+ }
+
+ s.seekg(0); // rewind
+ auto sketch1 = req_sketch<float, true>::deserialize(s);
+ auto sketch2 = req_sketch<float, true>::deserialize(bytes.data(), bytes.size());
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
+ REQUIRE(sketch2.is_empty() == sketch1.is_empty());
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
+ REQUIRE(sketch2.get_n() == sketch.get_n());
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary);
+ auto sketch = req_sketch<float, true>::deserialize(is);
+ std::cout << sketch.to_string();
+ REQUIRE(sketch.is_empty());
+ REQUIRE_FALSE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_n() == 0);
+ REQUIRE(sketch.get_num_retained() == 0);
+ REQUIRE(std::isnan(sketch.get_min_value()));
+ REQUIRE(std::isnan(sketch.get_max_value()));
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary);
+ auto sketch = req_sketch<float, true>::deserialize(is);
+ std::cout << sketch.to_string();
+ REQUIRE_FALSE(sketch.is_empty());
+ REQUIRE_FALSE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_n() == 1);
+ REQUIRE(sketch.get_num_retained() == 1);
+ REQUIRE(sketch.get_min_value() == 1);
+ REQUIRE(sketch.get_max_value() == 1);
+}
+
+TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
+ std::ifstream is;
+ is.exceptions(std::ios::failbit | std::ios::badbit);
+ is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary);
+ auto sketch = req_sketch<float, true>::deserialize(is);
+ std::cout << sketch.to_string();
+ REQUIRE_FALSE(sketch.is_empty());
+ REQUIRE(sketch.is_estimation_mode());
+ REQUIRE(sketch.get_n() == 10000);
+ REQUIRE(sketch.get_num_retained() == 2942);
+ REQUIRE(sketch.get_min_value() == 0);
+ REQUIRE(sketch.get_max_value() == 9999);
+}
+
TEST_CASE("req sketch: merge", "[req_sketch]") {
req_sketch<float, true> sketch1(100);
for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org