You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/04/05 18:54:22 UTC
[datasketches-cpp] branch quantiles updated: remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch quantiles
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/quantiles by this push:
new 093e33e remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact
093e33e is described below
commit 093e33e06e7b73ad346113338c04ecdd599745a4
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Apr 5 11:54:11 2022 -0700
remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact
---
quantiles/include/quantiles_sketch_impl.hpp | 16 ++++++--
quantiles/test/quantiles_sketch_test.cpp | 60 +++++++----------------------
2 files changed, 25 insertions(+), 51 deletions(-)
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index 7cfb919..84f2867 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -174,10 +174,14 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
const uint8_t family = FAMILY;
write(os, family);
+ // side-effect: sort base buffer since always compact
+ // can't set is_sorted_ since const method
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+
// empty, ordered, compact are valid flags
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
- | (is_sorted_ ? 1 << flags::IS_SORTED : 0)
+ | (1 << flags::IS_SORTED) // always sorted as side effect noted above
| (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
);
write(os, flags_byte);
@@ -218,11 +222,15 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
const uint8_t family = FAMILY;
ptr += copy_to_mem(family, ptr);
+ // side-effect: sort base buffer since always compact
+ // can't set is_sorted_ since const method
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+
// empty, ordered, compact are valid flags
const uint8_t flags_byte(
(is_empty() ? 1 << flags::IS_EMPTY : 0)
- | (is_sorted_ ? 1 << flags::IS_SORTED : 0)
- | (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
+ | (1 << flags::IS_SORTED) // always sorted as side effect noted above
+ | (1 << flags::IS_COMPACT) // always compact
);
ptr += copy_to_mem(flags_byte, ptr);
ptr += copy_to_mem(k_, ptr);
@@ -345,7 +353,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
level.insert(level.begin(),
std::make_move_iterator(items.get()),
std::make_move_iterator(items.get() + num_items));
- return std::move(level);
+ return level;
}
template<typename T, typename C, typename A>
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 218167d..677da9e 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -274,20 +274,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
}
}
- /*
- SECTION("deserialize from java") {
- std::ifstream is;
- is.exceptions(std::ios::failbit | std::ios::badbit);
- is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
- auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
- REQUIRE_FALSE(sketch.is_empty());
- REQUIRE(sketch.is_estimation_mode());
- REQUIRE(sketch.get_n() == 1000000);
- REQUIRE(sketch.get_num_retained() == 614);
- REQUIRE(sketch.get_min_value() == 0.0);
- REQUIRE(sketch.get_max_value() == 999999.0);
- }
-*/
+
SECTION("stream serialize deserialize empty") {
quantiles_float_sketch sketch(200, 0);
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -358,20 +345,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.get_rank(1) == 0.0);
REQUIRE(sketch2.get_rank(2) == 1.0);
}
-/*
- SECTION("deserialize one item v1") {
- std::ifstream is;
- is.exceptions(std::ios::failbit | std::ios::badbit);
- is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
- auto sketch = quantiles_float_sketch::deserialize(is, serde<float>(), test_allocator<float>(0));
- REQUIRE_FALSE(sketch.is_empty());
- REQUIRE_FALSE(sketch.is_estimation_mode());
- REQUIRE(sketch.get_n() == 1);
- REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_min_value() == 1.0);
- REQUIRE(sketch.get_max_value() == 1.0);
- }
-*/
+
SECTION("stream serialize deserialize three items") {
quantiles_float_sketch sketch(200, 0);
sketch.update(1.0f);
@@ -745,30 +719,22 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
/*
SECTION("max serialized size arithmetic type") {
- REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 10) == 1968);
- REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 100) == 2316);
- REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000) == 2440);
- REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000) == 2800);
- REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000000) == 3160);
+ REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 10) == 1968);
+ REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 100) == 2316);
+ REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 1000) == 2440);
+ REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000) == 2800);
+ REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000000) == 3160);
}
SECTION("max serialized size non-arithmetic type") {
- REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
- REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
- REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
- REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
- REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
- }
-
- SECTION("issue #236") {
- kll_sketch<int8_t> kll;
- kll.update(1);
- kll.update(2);
- kll.update(3);
- auto blob = kll.serialize();
- auto kll2 = kll_sketch<int8_t>::deserialize(blob.data(), blob.size());
+ REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
+ REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
+ REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
+ REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
+ REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
}
*/
+
// cleanup
if (test_allocator_total_bytes != 0) {
REQUIRE(test_allocator_total_bytes == 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org