You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/14 21:04:17 UTC

[incubator-datasketches-cpp] branch tuple_sketch updated: intersection has_result and tests

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/tuple_sketch by this push:
     new a3de326  intersection has_result and tests
a3de326 is described below

commit a3de3263af0874d3672dc801b5dd30eaea60ffb2
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Jul 14 14:04:04 2020 -0700

    intersection has_result and tests
---
 tuple/include/theta_intersection_base.hpp      |   2 +
 tuple/include/theta_intersection_base_impl.hpp |   6 +
 tuple/include/tuple_intersection.hpp           |   6 +
 tuple/include/tuple_intersection_impl.hpp      |   5 +
 tuple/test/theta_union_experimental_test.cpp   |   3 -
 tuple/test/tuple_intersection_test.cpp         | 180 +++++++++++++++++++++++--
 6 files changed, 185 insertions(+), 17 deletions(-)

diff --git a/tuple/include/theta_intersection_base.hpp b/tuple/include/theta_intersection_base.hpp
index 2057646..e9a69ee 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -44,6 +44,8 @@ public:
 
   CompactSketch get_result(bool ordered = true) const;
 
+  bool has_result() const;
+
 private:
   Policy policy_;
   bool is_valid_;
diff --git a/tuple/include/theta_intersection_base_impl.hpp b/tuple/include/theta_intersection_base_impl.hpp
index 084392b..790873e 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -141,6 +141,7 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
 
 template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
 CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+  if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
   std::vector<EN, A> entries_copy;
   if (num_entries_ > 0) {
     entries_copy.reserve(num_entries_);
@@ -150,4 +151,9 @@ CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const
   return CS(is_empty_, ordered, seed_hash_, theta_, std::move(entries_copy));
 }
 
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+bool theta_intersection_base<EN, EK, P, S, CS, A>::has_result() const {
+  return is_valid_;
+}
+
 } /* namespace datasketches */
diff --git a/tuple/include/tuple_intersection.hpp b/tuple/include/tuple_intersection.hpp
index bfab530..3d9bb0d 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -86,6 +86,12 @@ public:
    */
   CompactSketch get_result(bool ordered = true) const;
 
+  /**
+   * Returns true if the state of the intersection is defined (not infinite "universe").
+   * @return true if the state is valid
+   */
+  bool has_result() const;
+
 private:
   State state_;
 };
diff --git a/tuple/include/tuple_intersection_impl.hpp b/tuple/include/tuple_intersection_impl.hpp
index ddbde8e..b5a6050 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -35,4 +35,9 @@ auto tuple_intersection<S, P, A>::get_result(bool ordered) const -> CompactSketc
   return state_.get_result(ordered);
 }
 
+template<typename S, typename P, typename A>
+bool tuple_intersection<S, P, A>::has_result() const {
+  return state_.has_result();
+}
+
 } /* namespace datasketches */
diff --git a/tuple/test/theta_union_experimental_test.cpp b/tuple/test/theta_union_experimental_test.cpp
index cbe1412..d08a070 100644
--- a/tuple/test/theta_union_experimental_test.cpp
+++ b/tuple/test/theta_union_experimental_test.cpp
@@ -27,7 +27,6 @@
 namespace datasketches {
 
 TEST_CASE("theta_union_exeperimental") {
-  std::cout << "theta union test begin" << std::endl;
   auto update_sketch1 = theta_sketch_experimental<>::builder().build();
   update_sketch1.update(1);
   update_sketch1.update(2);
@@ -40,8 +39,6 @@ TEST_CASE("theta_union_exeperimental") {
   u.update(update_sketch1);
   u.update(update_sketch2);
   auto r = u.get_result();
-  std::cout << r.to_string(true);
-  std::cout << "theta union test end" << std::endl;
 }
 
 } /* namespace datasketches */
diff --git a/tuple/test/tuple_intersection_test.cpp b/tuple/test/tuple_intersection_test.cpp
index a629d61..00591e3 100644
--- a/tuple/test/tuple_intersection_test.cpp
+++ b/tuple/test/tuple_intersection_test.cpp
@@ -31,20 +31,172 @@ struct subtracting_intersection_policy {
   }
 };
 
-TEST_CASE("tuple_intersection float", "[tuple_intersection]") {
-  auto update_sketch1 = update_tuple_sketch<float>::builder().build();
-  update_sketch1.update(1, 1);
-  update_sketch1.update(2, 1);
-
-  auto update_sketch2 = update_tuple_sketch<float>::builder().build();
-  update_sketch2.update(1, 1);
-  update_sketch2.update(3, 1);
-
-  tuple_intersection<float, subtracting_intersection_policy<float>> is;
-  is.update(update_sketch1);
-  is.update(update_sketch2);
-  auto r = is.get_result();
-  REQUIRE(r.get_num_retained() == 1);
+using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
+
+TEST_CASE("tuple intersection: invalid", "[tuple_intersection]") {
+  tuple_intersection_float intersection;
+  REQUIRE_FALSE(intersection.has_result());
+  REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
+}
+
+TEST_CASE("tuple intersection: empty", "[tuple_intersection]") {
+  auto sketch = update_tuple_sketch<float>::builder().build();
+  tuple_intersection_float intersection;
+  intersection.update(sketch);
+  auto result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+
+  intersection.update(sketch);
+  result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
+  auto sketch = update_tuple_sketch<float>::builder().set_p(0.001).build();
+  sketch.update(1, 1);
+  tuple_intersection_float intersection;
+  intersection.update(sketch);
+  auto result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+  REQUIRE(result.get_estimate() == 0.0);
+
+  intersection.update(sketch);
+  result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
+  auto sketch1 = update_tuple_sketch<float>::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+
+  auto sketch2 = update_tuple_sketch<float>::builder().build();
+  value = 500;
+  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+
+  { // unordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1);
+    intersection.update(sketch2);
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE_FALSE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 500.0);
+  }
+  { // ordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1.compact());
+    intersection.update(sketch2.compact());
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE_FALSE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 500.0);
+  }
+}
+
+TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
+  auto sketch1 = update_tuple_sketch<float>::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+
+  auto sketch2 = update_tuple_sketch<float>::builder().build();
+  for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+
+  { // unordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1);
+    intersection.update(sketch2);
+    auto result = intersection.get_result();
+    REQUIRE(result.is_empty());
+    REQUIRE_FALSE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 0.0);
+  }
+  { // ordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1.compact());
+    intersection.update(sketch2.compact());
+    auto result = intersection.get_result();
+    REQUIRE(result.is_empty());
+    REQUIRE_FALSE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 0.0);
+  }
+}
+
+TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
+  auto sketch1 = update_tuple_sketch<float>::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+
+  auto sketch2 = update_tuple_sketch<float>::builder().build();
+  value = 5000;
+  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+
+  { // unordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1);
+    intersection.update(sketch2);
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+  }
+  { // ordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1.compact());
+    intersection.update(sketch2.compact());
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+  }
+}
+
+TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
+  auto sketch1 = update_tuple_sketch<float>::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+
+  auto sketch2 = update_tuple_sketch<float>::builder().build();
+  for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+
+  { // unordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1);
+    intersection.update(sketch2);
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 0.0);
+  }
+  { // ordered
+    tuple_intersection_float intersection;
+    intersection.update(sketch1.compact());
+    intersection.update(sketch2.compact());
+    auto result = intersection.get_result();
+    REQUIRE_FALSE(result.is_empty());
+    REQUIRE(result.is_estimation_mode());
+    REQUIRE(result.get_estimate() == 0.0);
+  }
+}
+
+TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
+  auto sketch = update_tuple_sketch<float>::builder().build();
+  sketch.update(1, 1); // non-empty should not be ignored
+  tuple_intersection_float intersection(123);
+  REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
 }
 
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org