You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/14 21:04:17 UTC
[incubator-datasketches-cpp] branch tuple_sketch updated:
intersection has_result and tests
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new a3de326 intersection has_result and tests
a3de326 is described below
commit a3de3263af0874d3672dc801b5dd30eaea60ffb2
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Jul 14 14:04:04 2020 -0700
intersection has_result and tests
---
tuple/include/theta_intersection_base.hpp | 2 +
tuple/include/theta_intersection_base_impl.hpp | 6 +
tuple/include/tuple_intersection.hpp | 6 +
tuple/include/tuple_intersection_impl.hpp | 5 +
tuple/test/theta_union_experimental_test.cpp | 3 -
tuple/test/tuple_intersection_test.cpp | 180 +++++++++++++++++++++++--
6 files changed, 185 insertions(+), 17 deletions(-)
diff --git a/tuple/include/theta_intersection_base.hpp b/tuple/include/theta_intersection_base.hpp
index 2057646..e9a69ee 100644
--- a/tuple/include/theta_intersection_base.hpp
+++ b/tuple/include/theta_intersection_base.hpp
@@ -44,6 +44,8 @@ public:
CompactSketch get_result(bool ordered = true) const;
+ bool has_result() const;
+
private:
Policy policy_;
bool is_valid_;
diff --git a/tuple/include/theta_intersection_base_impl.hpp b/tuple/include/theta_intersection_base_impl.hpp
index 084392b..790873e 100644
--- a/tuple/include/theta_intersection_base_impl.hpp
+++ b/tuple/include/theta_intersection_base_impl.hpp
@@ -141,6 +141,7 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
+ if (!is_valid_) throw std::invalid_argument("calling get_result() before calling update() is undefined");
std::vector<EN, A> entries_copy;
if (num_entries_ > 0) {
entries_copy.reserve(num_entries_);
@@ -150,4 +151,9 @@ CS theta_intersection_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const
return CS(is_empty_, ordered, seed_hash_, theta_, std::move(entries_copy));
}
+template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
+bool theta_intersection_base<EN, EK, P, S, CS, A>::has_result() const {
+ return is_valid_;
+}
+
} /* namespace datasketches */
diff --git a/tuple/include/tuple_intersection.hpp b/tuple/include/tuple_intersection.hpp
index bfab530..3d9bb0d 100644
--- a/tuple/include/tuple_intersection.hpp
+++ b/tuple/include/tuple_intersection.hpp
@@ -86,6 +86,12 @@ public:
*/
CompactSketch get_result(bool ordered = true) const;
+ /**
+ * Returns true if the state of the intersection is defined (not infinite "universe").
+ * @return true if the state is valid
+ */
+ bool has_result() const;
+
private:
State state_;
};
diff --git a/tuple/include/tuple_intersection_impl.hpp b/tuple/include/tuple_intersection_impl.hpp
index ddbde8e..b5a6050 100644
--- a/tuple/include/tuple_intersection_impl.hpp
+++ b/tuple/include/tuple_intersection_impl.hpp
@@ -35,4 +35,9 @@ auto tuple_intersection<S, P, A>::get_result(bool ordered) const -> CompactSketc
return state_.get_result(ordered);
}
+template<typename S, typename P, typename A>
+bool tuple_intersection<S, P, A>::has_result() const {
+ return state_.has_result();
+}
+
} /* namespace datasketches */
diff --git a/tuple/test/theta_union_experimental_test.cpp b/tuple/test/theta_union_experimental_test.cpp
index cbe1412..d08a070 100644
--- a/tuple/test/theta_union_experimental_test.cpp
+++ b/tuple/test/theta_union_experimental_test.cpp
@@ -27,7 +27,6 @@
namespace datasketches {
TEST_CASE("theta_union_exeperimental") {
- std::cout << "theta union test begin" << std::endl;
auto update_sketch1 = theta_sketch_experimental<>::builder().build();
update_sketch1.update(1);
update_sketch1.update(2);
@@ -40,8 +39,6 @@ TEST_CASE("theta_union_exeperimental") {
u.update(update_sketch1);
u.update(update_sketch2);
auto r = u.get_result();
- std::cout << r.to_string(true);
- std::cout << "theta union test end" << std::endl;
}
} /* namespace datasketches */
diff --git a/tuple/test/tuple_intersection_test.cpp b/tuple/test/tuple_intersection_test.cpp
index a629d61..00591e3 100644
--- a/tuple/test/tuple_intersection_test.cpp
+++ b/tuple/test/tuple_intersection_test.cpp
@@ -31,20 +31,172 @@ struct subtracting_intersection_policy {
}
};
-TEST_CASE("tuple_intersection float", "[tuple_intersection]") {
- auto update_sketch1 = update_tuple_sketch<float>::builder().build();
- update_sketch1.update(1, 1);
- update_sketch1.update(2, 1);
-
- auto update_sketch2 = update_tuple_sketch<float>::builder().build();
- update_sketch2.update(1, 1);
- update_sketch2.update(3, 1);
-
- tuple_intersection<float, subtracting_intersection_policy<float>> is;
- is.update(update_sketch1);
- is.update(update_sketch2);
- auto r = is.get_result();
- REQUIRE(r.get_num_retained() == 1);
+using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
+
+TEST_CASE("tuple intersection: invalid", "[tuple_intersection]") {
+ tuple_intersection_float intersection;
+ REQUIRE_FALSE(intersection.has_result());
+ REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
+}
+
+TEST_CASE("tuple intersection: empty", "[tuple_intersection]") {
+ auto sketch = update_tuple_sketch<float>::builder().build();
+ tuple_intersection_float intersection;
+ intersection.update(sketch);
+ auto result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
+ auto sketch = update_tuple_sketch<float>::builder().set_p(0.001).build();
+ sketch.update(1, 1);
+ tuple_intersection_float intersection;
+ intersection.update(sketch);
+ auto result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
+ auto sketch1 = update_tuple_sketch<float>::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+
+ auto sketch2 = update_tuple_sketch<float>::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+
+ { // unordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+ }
+ { // ordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+ }
+}
+
+TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
+ auto sketch1 = update_tuple_sketch<float>::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
+
+ auto sketch2 = update_tuple_sketch<float>::builder().build();
+ for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
+
+ { // unordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ auto result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+ }
+ { // ordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ auto result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+ }
+}
+
+TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
+ auto sketch1 = update_tuple_sketch<float>::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+
+ auto sketch2 = update_tuple_sketch<float>::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+
+ { // unordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+ }
+ { // ordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+ }
+}
+
+TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
+ auto sketch1 = update_tuple_sketch<float>::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
+
+ auto sketch2 = update_tuple_sketch<float>::builder().build();
+ for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
+
+ { // unordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+ }
+ { // ordered
+ tuple_intersection_float intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ auto result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+ }
+}
+
+TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
+ auto sketch = update_tuple_sketch<float>::builder().build();
+ sketch.update(1, 1); // non-empty should not be ignored
+ tuple_intersection_float intersection(123);
+ REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org