You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/04/05 23:49:30 UTC

[datasketches-cpp] branch quantiles updated: ensure k is power of 2 to match java

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch quantiles
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/quantiles by this push:
     new 368f28a  ensure k is power of 2 to match java
368f28a is described below

commit 368f28a95952e65f98c457e7d27a5e172a66da38
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Apr 5 16:49:14 2022 -0700

    ensure k is power of 2 to match java
---
 quantiles/include/quantiles_sketch.hpp      |  1 +
 quantiles/include/quantiles_sketch_impl.hpp | 15 +++++++++---
 quantiles/test/quantiles_sketch_test.cpp    | 36 +++++++++++++++--------------
 3 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/quantiles/include/quantiles_sketch.hpp b/quantiles/include/quantiles_sketch.hpp
index 075dd51..ec3b475 100644
--- a/quantiles/include/quantiles_sketch.hpp
+++ b/quantiles/include/quantiles_sketch.hpp
@@ -502,6 +502,7 @@ private:
   template<typename SerDe>
   static std::pair<Level, size_t> deserialize_array(const void* bytes, size_t size, uint32_t num_items, uint32_t capcacity, const SerDe& serde, const Allocator& allocator);
 
+  static void check_k(uint16_t k);
   static void check_serial_version(uint8_t serial_version);
   static void check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version);
   static void check_family_id(uint8_t family_id);
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index 84f2867..f42d02f 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -43,9 +43,7 @@ min_value_(nullptr),
 max_value_(nullptr),
 is_sorted_(true)
 {
-  if (k < quantiles_constants::MIN_K || k > quantiles_constants::MAX_K) {
-    throw std::invalid_argument("K must be >= " + std::to_string(quantiles_constants::MIN_K) + " and <= " + std::to_string(quantiles_constants::MAX_K) + ": " + std::to_string(k));
-  }
+  check_k(k_);
   base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
 }
 
@@ -268,6 +266,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
   const auto k = read<uint16_t>(is);
   read<uint16_t>(is); // unused
 
+  check_k(k);
   check_serial_version(serial_version); // a little redundant with the next line, but explicit checks
   check_family_id(family_id);
   check_header_validity(preamble_longs, flags_byte, serial_version);
@@ -376,6 +375,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
   uint16_t unused;
   ptr += copy_from_mem(ptr, unused);
 
+  check_k(k);
   check_serial_version(serial_version); // a little redundant with the next line, but explicit checks
   check_family_id(family_id);
   check_header_validity(preamble_longs, flags_byte, serial_version);
@@ -768,6 +768,15 @@ uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(const uint16_t k, const
   return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
 }
 
+template<typename T, typename C, typename A>
+void quantiles_sketch<T, C, A>::check_k(uint16_t k) {
+  if (k < quantiles_constants::MIN_K || k > quantiles_constants::MAX_K || (k & k - 1) != 0) {
+    throw std::invalid_argument("k must be a power of 2 that is >= "
+      + std::to_string(quantiles_constants::MIN_K) + " and <= "
+      + std::to_string(quantiles_constants::MAX_K) + ". Found: " + std::to_string(k));
+  }
+}
+
 template<typename T, typename C, typename A>
 void quantiles_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
   if (serial_version == SERIAL_VERSION || serial_version == SERIAL_VERSION_1 || serial_version == SERIAL_VERSION_2)
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 847a761..3c17ef8 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -20,6 +20,7 @@
 #include <catch.hpp>
 #include <cmath>
 #include <sstream>
+#include <fstream>
 
 #include <quantiles_sketch.hpp>
 #include <test_allocator.hpp>
@@ -49,6 +50,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
     quantiles_float_sketch sketch1(quantiles_constants::MIN_K, 0); // this should work
     quantiles_float_sketch sketch2(quantiles_constants::MAX_K, 0); // this should work
     REQUIRE_THROWS_AS(new quantiles_float_sketch(quantiles_constants::MIN_K - 1, 0), std::invalid_argument);
+    REQUIRE_THROWS_AS(new quantiles_float_sketch(40, 0), std::invalid_argument); // not power of 2
     // MAX_K + 1 makes no sense because k is uint16_t
   }
 
@@ -75,7 +77,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("get bad quantile") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(64, 0);
     sketch.update(0.0f); // has to be non-empty to reach the check
     REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
   }
@@ -108,7 +110,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("NaN") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(256, 0);
     sketch.update(std::numeric_limits<float>::quiet_NaN());
     REQUIRE(sketch.is_empty());
 
@@ -119,7 +121,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
 
 
   SECTION("sampling mode") {
-    const uint16_t k = 10;
+    const uint16_t k = 8;
     const uint32_t n = 16 * (2 * k) + 1;
     quantiles_float_sketch sk(k, 0);
     for (uint32_t i = 0; i < n; ++i) {
@@ -170,7 +172,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("10 items") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     sketch.update(1.0f);
     sketch.update(2.0f);
     sketch.update(3.0f);
@@ -188,7 +190,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("100 items") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
     REQUIRE(sketch.get_quantile(0) == 0);
     REQUIRE(sketch.get_quantile(0.01) == 1);
@@ -237,7 +239,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
       previous_quantile = quantile;
     }
 
-    std::cout << sketch.to_string();
+    //std::cout << sketch.to_string();
 
     uint32_t count = 0;
     uint64_t total_weight = 0;
@@ -250,7 +252,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("consistency between get_rank and get_PMF/CDF") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(64, 0);
     const int n = 1000;
     float values[n];
     for (int i = 0; i < n; i++) {
@@ -276,7 +278,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("stream serialize deserialize empty") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
     sketch.serialize(s);
     REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
@@ -294,7 +296,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("bytes serialize deserialize empty") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(256, 0);
     auto bytes = sketch.serialize();
     auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
     REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
@@ -309,7 +311,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("stream serialize deserialize one item") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(32, 0);
     sketch.update(1.0f);
     std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
     sketch.serialize(s);
@@ -329,7 +331,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("bytes serialize deserialize one item") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(64, 0);
     sketch.update(1.0f);
     auto bytes = sketch.serialize();
     REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
@@ -347,7 +349,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("stream serialize deserialize three items") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     sketch.update(1.0f);
     sketch.update(2.0f);
     sketch.update(3.0f);
@@ -366,7 +368,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("bytes serialize deserialize three items") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     sketch.update(1.0f);
     sketch.update(2.0f);
     sketch.update(3.0f);
@@ -383,7 +385,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("stream serialize deserialize many floats") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     const int n = 1000;
     for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
     std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -405,7 +407,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
     REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
   }
   SECTION("bytes serialize deserialize many floats") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(128, 0);
     const int n = 1000;
     for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
     auto bytes = sketch.serialize();
@@ -453,7 +455,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("out of order split points, float") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(256, 0);
     sketch.update(0.0f); // has too be non-empty to reach the check
     float split_points[2] = {1, 0};
     REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
@@ -467,7 +469,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
   }
 
   SECTION("NaN split point") {
-    quantiles_float_sketch sketch(200, 0);
+    quantiles_float_sketch sketch(512, 0);
     sketch.update(0.0f); // has too be non-empty to reach the check
     float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
     REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org