You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/04/05 18:54:22 UTC

[datasketches-cpp] branch quantiles updated: remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch quantiles
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/quantiles by this push:
     new 093e33e  remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact
093e33e is described below

commit 093e33e06e7b73ad346113338c04ecdd599745a4
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Apr 5 11:54:11 2022 -0700

    remove unnecessary move, sort BB as side-effect on serialize despite const method since always compact
---
 quantiles/include/quantiles_sketch_impl.hpp | 16 ++++++--
 quantiles/test/quantiles_sketch_test.cpp    | 60 +++++++----------------------
 2 files changed, 25 insertions(+), 51 deletions(-)

diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index 7cfb919..84f2867 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -174,10 +174,14 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
   const uint8_t family = FAMILY;
   write(os, family);
 
+  // side-effect: sort base buffer since always compact
+  // can't set is_sorted_ since const method
+  std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+
   // empty, ordered, compact are valid flags
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
-    | (is_sorted_ ? 1 << flags::IS_SORTED : 0)
+    | (1 << flags::IS_SORTED) // always sorted as side effect noted above
     | (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
   );
   write(os, flags_byte);
@@ -218,11 +222,15 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
   const uint8_t family = FAMILY;
   ptr += copy_to_mem(family, ptr);
 
+  // side-effect: sort base buffer since always compact
+  // can't set is_sorted_ since const method
+  std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+
   // empty, ordered, compact are valid flags
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
-    | (is_sorted_ ? 1 << flags::IS_SORTED : 0)
-    | (1 << flags::IS_COMPACT) // always compact -- could be optional for numeric types?
+    | (1 << flags::IS_SORTED) // always sorted as side effect noted above
+    | (1 << flags::IS_COMPACT) // always compact
   );
   ptr += copy_to_mem(flags_byte, ptr);
   ptr += copy_to_mem(k_, ptr);
@@ -345,7 +353,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
   level.insert(level.begin(),
                std::make_move_iterator(items.get()),
                std::make_move_iterator(items.get() + num_items));
-  return std::move(level);
+  return level;
 }
 
 template<typename T, typename C, typename A>
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 218167d..677da9e 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -274,20 +274,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
       }
     }
   }
-  /*
-  SECTION("deserialize from java") {
-    std::ifstream is;
-    is.exceptions(std::ios::failbit | std::ios::badbit);
-    is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
-    auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
-    REQUIRE_FALSE(sketch.is_empty());
-    REQUIRE(sketch.is_estimation_mode());
-    REQUIRE(sketch.get_n() == 1000000);
-    REQUIRE(sketch.get_num_retained() == 614);
-    REQUIRE(sketch.get_min_value() == 0.0);
-    REQUIRE(sketch.get_max_value() == 999999.0);
-  }
-*/
+
   SECTION("stream serialize deserialize empty") {
     quantiles_float_sketch sketch(200, 0);
     std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -358,20 +345,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
     REQUIRE(sketch2.get_rank(1) == 0.0);
     REQUIRE(sketch2.get_rank(2) == 1.0);
   }
-/*
-  SECTION("deserialize one item v1") {
-    std::ifstream is;
-    is.exceptions(std::ios::failbit | std::ios::badbit);
-    is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
-    auto sketch = quantiles_float_sketch::deserialize(is, serde<float>(), test_allocator<float>(0));
-    REQUIRE_FALSE(sketch.is_empty());
-    REQUIRE_FALSE(sketch.is_estimation_mode());
-    REQUIRE(sketch.get_n() == 1);
-    REQUIRE(sketch.get_num_retained() == 1);
-    REQUIRE(sketch.get_min_value() == 1.0);
-    REQUIRE(sketch.get_max_value() == 1.0);
-  }
-*/
+
   SECTION("stream serialize deserialize three items") {
     quantiles_float_sketch sketch(200, 0);
     sketch.update(1.0f);
@@ -745,30 +719,22 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 /*
   SECTION("max serialized size arithmetic type") {
-    REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 10) == 1968);
-    REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 100) == 2316);
-    REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000) == 2440);
-    REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000) == 2800);
-    REQUIRE(kll_sketch<float>::get_max_serialized_size_bytes(200, 1000000000) == 3160);
+    REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 10) == 1968);
+    REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 100) == 2316);
+    REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(128, 1000) == 2440);
+    REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000) == 2800);
+    REQUIRE(quantiles_sketch<float>::get_max_serialized_size_bytes(256, 1000000000) == 3160);
   }
 
   SECTION("max serialized size non-arithmetic type") {
-    REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
-    REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
-    REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
-    REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
-    REQUIRE(kll_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
-  }
-
-  SECTION("issue #236") {
-    kll_sketch<int8_t> kll;
-    kll.update(1);
-    kll.update(2);
-    kll.update(3);
-    auto blob = kll.serialize();
-    auto kll2 = kll_sketch<int8_t>::deserialize(blob.data(), blob.size());
+    REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 10, 4) == 1968);
+    REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 100, 4) == 2316);
+    REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000, 4) == 2440);
+    REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000, 4) == 2800);
+    REQUIRE(quantiles_sketch<std::string>::get_max_serialized_size_bytes(200, 1000000000, 4) == 3160);
   }
 */
+
   // cleanup
   if (test_allocator_total_bytes != 0) {
     REQUIRE(test_allocator_total_bytes == 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org