You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/04/26 07:17:22 UTC

[datasketches-cpp] 01/03: improve test coverage

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch quantiles
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 8a5c65e302473a2cc339249bf2fe79ee1d278b8b
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Mon Apr 25 23:55:28 2022 -0700

    improve test coverage
---
 quantiles/include/quantiles_sketch_impl.hpp |  19 ----
 quantiles/test/quantiles_sketch_test.cpp    | 133 ++++++++++++++++++++++++++++
 2 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index cf620d3..2065396 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -208,25 +208,6 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
     }
     *this = sk_copy;
   }
-
-/*
-  // update min/max values
-  // can't just check is_empty() since min/max might not have been set if
-  // there were no base buffer items added via update()
-  if (min_value_ == nullptr) {
-    min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
-  } else {
-    if (C()(*other.min_value_, *min_value_))
-      *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
-  }
-
-  if (max_value_ == nullptr) {
-    max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
-  } else {
-    if (C()(*max_value_, *other.max_value_))
-      *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
-  }
-  */
 }
 
 template<typename T, typename C, typename A>
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 8860e4a..1c8764e 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -599,6 +599,139 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
     REQUIRE(sketch2.get_max_value() == 999999.0f);
   }
 
+  SECTION("merge: two empty") {
+    quantiles_float_sketch sk1(128, 0);
+    quantiles_float_sketch sk2(64, 0);
+    sk1.merge(sk2);
+    REQUIRE(sk1.get_n() == 0);
+    REQUIRE(sk1.get_k() == 128);
+
+    sk2.merge(const_cast<const quantiles_float_sketch&>(sk1));
+    REQUIRE(sk2.get_n() == 0);
+    REQUIRE(sk2.get_k() == 64);
+  }
+
+  SECTION("merge: exact as input") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(2 * k, 0);
+    quantiles_float_sketch sketch2(k, 0);
+
+    for (int i = 0; i < k / 2; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(i));
+    }
+
+    for (int i = 0; i < 100 * k; i++) {
+      sketch1.update(static_cast<float>(i));
+    }
+    
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 101 * k);
+    REQUIRE(sketch1.get_k() == 2 * k); // no reason to have shrunk
+    REQUIRE(sketch1.get_min_value() == 0.0f);
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+  }
+
+  SECTION("merge: src estimation, tgt exact, tgt.k > src.k") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(2 * k, 0);
+    quantiles_float_sketch sketch2(k, 0);
+
+    for (int i = 0; i < k / 2; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(i));
+    }
+    
+    for (int i = 0; i < 100 * k; i++) {
+      sketch2.update(static_cast<float>(i));
+    }
+
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 101 * k);
+    REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
+    REQUIRE(sketch1.get_min_value() == 0.0f);
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+  }
+
+  SECTION("merge: both estimation, tgt.k < src.k") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(k, 0);
+    quantiles_float_sketch sketch2(2 * k, 0);
+
+    for (int i = 0; i < 100 * k; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(-i));
+    }
+    
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 200 * k);
+    REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
+    REQUIRE(sketch1.get_min_value() == static_cast<float>(-100 * k + 1));
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+    REQUIRE(sketch1.get_quantile(0.5) == Approx(0.0).margin(100 * k * RANK_EPS_FOR_K_128));
+  }
+
+  SECTION("merge: src estimation, tgt exact, equal k") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(k, 0);
+    quantiles_float_sketch sketch2(k, 0);
+
+    for (int i = 0; i < k / 2; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(k - i - 1));
+    }
+    
+    for (int i = k; i < 100 * k; i++) {
+      sketch2.update(static_cast<float>(i));
+    }
+
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 100 * k);
+    REQUIRE(sketch1.get_k() == k);
+    REQUIRE(sketch1.get_min_value() == 0.0f);
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+    float n = 100 * k - 1;
+    REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_128));
+  }
+
+  SECTION("merge: both estimation, no base buffer, same k") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(k, 0);
+    quantiles_float_sketch sketch2(k, 0);
+
+    uint64_t n = 2 * k;
+    for (uint64_t i = 0; i < n; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(2 * n - i - 1));
+    }
+    
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 2 * n);
+    REQUIRE(sketch1.get_k() == k);
+    REQUIRE(sketch1.get_min_value() == 0.0f);
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(2 * n - 1));
+    REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
+  }
+
+  SECTION("merge: both estimation, no base buffer, tgt.k < src.k") {
+    const uint16_t k = 128;
+    quantiles_float_sketch sketch1(k, 0);
+    quantiles_float_sketch sketch2(2 * k, 0);
+
+    uint64_t n = 4 * k;
+    for (uint64_t i = 0; i < n; i++) {
+      sketch1.update(static_cast<float>(i));
+      sketch2.update(static_cast<float>(2 * n - i - 1));
+    }
+    
+    sketch1.merge(sketch2);
+    REQUIRE(sketch1.get_n() == 2 * n);
+    REQUIRE(sketch1.get_k() == k);
+    REQUIRE(sketch1.get_min_value() == 0.0f);
+    REQUIRE(sketch1.get_max_value() == static_cast<float>(2 * n - 1));
+    REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
+  }
+
   SECTION("sketch of ints") {
     quantiles_sketch<int> sketch;
     REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org