You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/23 03:27:51 UTC

[incubator-datasketches-cpp] 02/03: tests

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 67f90407590994e8d0a4a7b67ffcbef1ddbf0f2e
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 20:27:06 2020 -0700

    tests
---
 tuple/test/theta_sketch_experimental_test.cpp | 244 +++++++++++++++++++++++---
 1 file changed, 216 insertions(+), 28 deletions(-)

diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index 0fb0286..e0ccdf3 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -17,40 +17,228 @@
  * under the License.
  */
 
-#include <iostream>
+#include <fstream>
+#include <sstream>
 
 #include <catch.hpp>
 #include <theta_sketch_experimental.hpp>
-#include <../../theta/include/theta_sketch.hpp>
 
 namespace datasketches {
 
-TEST_CASE("theta_sketch_experimental: basics ", "[theta_sketch]") {
-  auto update_sketch = update_theta_sketch_experimental<>::builder().build();
+#ifdef TEST_BINARY_INPUT_PATH
+const std::string inputPath = TEST_BINARY_INPUT_PATH;
+#else
+const std::string inputPath = "test/";
+#endif
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+
+TEST_CASE("theta sketch: empty", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+  REQUIRE(update_sketch.is_empty());
+  REQUIRE_FALSE(update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_theta() == 1.0);
+  REQUIRE(update_sketch.get_estimate() == 0.0);
+  REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
+  REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
+
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  REQUIRE(compact_sketch.is_empty());
+  REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_theta() == 1.0);
+  REQUIRE(compact_sketch.get_estimate() == 0.0);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
+  REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
+}
+
+TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
+  update_sketch.update(1);
+  //std::cerr << update_sketch.to_string();
+  REQUIRE(update_sketch.get_num_retained() == 0);
+  REQUIRE_FALSE(update_sketch.is_empty());
+  REQUIRE(update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_estimate() == 0.0);
+  REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
+  REQUIRE(update_sketch.get_upper_bound(1) > 0);
+
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  REQUIRE(compact_sketch.get_num_retained() == 0);
+  REQUIRE_FALSE(compact_sketch.is_empty());
+  REQUIRE(compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_estimate() == 0.0);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
+  REQUIRE(compact_sketch.get_upper_bound(1) > 0);
+}
+
+TEST_CASE("theta sketch: single item", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+  update_sketch.update(1);
+  REQUIRE_FALSE(update_sketch.is_empty());
+  REQUIRE_FALSE(update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_theta() == 1.0);
+  REQUIRE(update_sketch.get_estimate() == 1.0);
+  REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
+  REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
+
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  REQUIRE_FALSE(compact_sketch.is_empty());
+  REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_theta() == 1.0);
+  REQUIRE(compact_sketch.get_estimate() == 1.0);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
+  REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
+}
+
+TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+  for (int i = 0; i < 2000; i++) update_sketch.update(i);
+  REQUIRE_FALSE(update_sketch.is_empty());
+  REQUIRE_FALSE(update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_theta() == 1.0);
+  REQUIRE(update_sketch.get_estimate() == 2000.0);
+  REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
+  REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
+
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  REQUIRE_FALSE(compact_sketch.is_empty());
+  REQUIRE_FALSE(compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_theta() == 1.0);
+  REQUIRE(compact_sketch.get_estimate() == 2000.0);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
+  REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
+}
+
+TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().set_resize_factor(update_theta_sketch::resize_factor::X1).build();
+  const int n = 8000;
+  for (int i = 0; i < n; i++) update_sketch.update(i);
+  //std::cerr << update_sketch.to_string();
+  REQUIRE_FALSE(update_sketch.is_empty());
+  REQUIRE(update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_theta() < 1.0);
+  REQUIRE(update_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
+  REQUIRE(update_sketch.get_lower_bound(1) < n);
+  REQUIRE(update_sketch.get_upper_bound(1) > n);
+
+  const uint32_t k = 1 << update_theta_sketch::builder::DEFAULT_LG_K;
+  REQUIRE(update_sketch.get_num_retained() >= k);
+  update_sketch.trim();
+  REQUIRE(update_sketch.get_num_retained() == k);
+
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  REQUIRE_FALSE(compact_sketch.is_empty());
+  REQUIRE(compact_sketch.is_ordered());
+  REQUIRE(compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_theta() < 1.0);
+  REQUIRE(compact_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
+  REQUIRE(compact_sketch.get_lower_bound(1) < n);
+  REQUIRE(compact_sketch.get_upper_bound(1) > n);
+}
+
+TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
+  auto sketch = compact_theta_sketch::deserialize(is);
+  REQUIRE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_num_retained() == 0);
+  REQUIRE(sketch.get_theta() == 1.0);
+  REQUIRE(sketch.get_estimate() == 0.0);
+  REQUIRE(sketch.get_lower_bound(1) == 0.0);
+  REQUIRE(sketch.get_upper_bound(1) == 0.0);
+}
+
+TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
+  auto sketch = compact_theta_sketch::deserialize(is);
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_num_retained() == 1);
+  REQUIRE(sketch.get_theta() == 1.0);
+  REQUIRE(sketch.get_estimate() == 1.0);
+  REQUIRE(sketch.get_lower_bound(1) == 1.0);
+  REQUIRE(sketch.get_upper_bound(1) == 1.0);
+}
+
+TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
+  std::ifstream is;
+  is.exceptions(std::ios::failbit | std::ios::badbit);
+  is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
+  auto sketch = compact_theta_sketch::deserialize(is);
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE(sketch.is_estimation_mode());
+  REQUIRE(sketch.is_ordered());
+  REQUIRE(sketch.get_num_retained() == 4342);
+  REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
+  REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
+  REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
+  REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
+
+  // the same construction process in Java must have produced exactly the same sketch
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+  const int n = 8192;
+  for (int i = 0; i < n; i++) update_sketch.update(i);
+  REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
+  REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
+  REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
+  REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
+  REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
+  REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
+  REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
+  REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
+  REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
+  compact_theta_sketch compact_sketch = update_sketch.compact();
+  // the sketches are ordered, so the iteration sequence must match exactly
+  auto iter = sketch.begin();
+  for (const auto& key: compact_sketch) {
+    REQUIRE(*iter == key);
+    ++iter;
+  }
+}
+
+TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
+  const int n = 8192;
+  for (int i = 0; i < n; i++) update_sketch.update(i);
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  update_sketch.compact().serialize(s);
+  auto bytes = update_sketch.compact().serialize();
+  REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
+  for (size_t i = 0; i < bytes.size(); ++i) {
+    REQUIRE(((char*)bytes.data())[i] == (char)s.get());
+  }
+
+  s.seekg(0); // rewind
+  compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
+  compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
+  REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
+  REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
+  REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
+  REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
+  REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
+  REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
+  REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
+  REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
+  // the sketches are ordered, so the iteration sequence must match exactly
+  auto iter = deserialized_sketch1.begin();
+  for (auto key: deserialized_sketch2) {
+    REQUIRE(*iter == key);
+    ++iter;
+  }
+}
+
+TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
+  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
   update_sketch.update(1);
-  update_sketch.update(2);
-  REQUIRE(update_sketch.get_num_retained() == 2);
-  int count = 0;
-  for (const auto& entry: update_sketch) ++count;
-  REQUIRE(count == 2);
-
-  auto compact_sketch = update_sketch.compact();
-  REQUIRE(compact_sketch.get_num_retained() == 2);
-}
-
-//TEST_CASE("theta_sketch_experimental: compare with theta production", "[theta_sketch]") {
-//  auto test = theta_sketch_experimental<>::builder().build();
-//  update_theta_sketch prod = update_theta_sketch::builder().build();
-//
-//  for (int i = 0; i < 1000000; ++i) {
-//    test.update(i);
-//    prod.update(i);
-//  }
-//
-//  std::cout << "--- theta production vs experimental ---" << std::endl;
-//  std::cout << test.to_string(true);
-//  std::cout << "sizeof(update_theta_sketch)=" << sizeof(update_theta_sketch) << std::endl;
-//  std::cout << prod.to_string(true);
-//}
+  auto bytes = update_sketch.compact().serialize();
+  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
+  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
+}
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org