You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/08/24 22:20:08 UTC
[incubator-datasketches-cpp] 01/02: theta intersection and a-not-b
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 73231d23402fb5bc2ba34fc8656b9232b0e61552
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Aug 24 15:19:22 2020 -0700
theta intersection and a-not-b
---
tuple/CMakeLists.txt | 12 +-
..._a_not_b.hpp => theta_a_not_b_experimental.hpp} | 28 +--
...mpl.hpp => theta_a_not_b_experimental_impl.hpp} | 10 +-
tuple/include/theta_intersection_experimental.hpp | 78 +++++++
...pp => theta_intersection_experimental_impl.hpp} | 22 +-
tuple/include/theta_union_experimental.hpp | 13 +-
tuple/include/tuple_a_not_b.hpp | 2 +-
tuple/include/tuple_a_not_b_impl.hpp | 4 +-
tuple/test/CMakeLists.txt | 4 +-
tuple/test/theta_a_not_b_experimental_test.cpp | 250 +++++++++++++++++++++
.../test/theta_intersection_experimental_test.cpp | 224 ++++++++++++++++++
tuple/test/theta_sketch_experimental_test.cpp | 3 +
12 files changed, 609 insertions(+), 41 deletions(-)
diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
index fce73ab..411c712 100644
--- a/tuple/CMakeLists.txt
+++ b/tuple/CMakeLists.txt
@@ -37,13 +37,15 @@ list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hp
list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
+list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_update_sketch_base.hpp;include/theta_update_sketch_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_union_base.hpp;include/theta_union_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_intersection_base.hpp;include/theta_intersection_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_set_difference_base_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
-list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_intersection_experimental.hpp;include/theta_intersection_experimental_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_a_not_b_experimental.hpp;include/theta_a_not_b_experimental_impl.hpp")
install(TARGETS tuple
EXPORT ${PROJECT_NAME}
@@ -62,6 +64,8 @@ target_sources(tuple
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base.hpp
@@ -74,6 +78,8 @@ target_sources(tuple
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental_impl.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental_impl.hpp
)
diff --git a/tuple/include/tuple_a_not_b.hpp b/tuple/include/theta_a_not_b_experimental.hpp
similarity index 66%
copy from tuple/include/tuple_a_not_b.hpp
copy to tuple/include/theta_a_not_b_experimental.hpp
index 923e2e1..ed29ce5 100644
--- a/tuple/include/tuple_a_not_b.hpp
+++ b/tuple/include/theta_a_not_b_experimental.hpp
@@ -17,28 +17,24 @@
* under the License.
*/
-#ifndef TUPLE_A_NOT_B_HPP_
-#define TUPLE_A_NOT_B_HPP_
+#ifndef THETA_A_NOT_B_EXPERIMENTAL_HPP_
+#define THETA_A_NOT_B_EXPERIMENTAL_HPP_
-#include "tuple_sketch.hpp"
+#include "theta_sketch_experimental.hpp"
#include "theta_set_difference_base.hpp"
namespace datasketches {
-template<
- typename Summary,
- typename Allocator = std::allocator<Summary>
->
-class tuple_a_not_b {
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_a_not_b_experimental {
public:
- using Entry = std::pair<uint64_t, Summary>;
- using ExtractKey = pair_extract_key<uint64_t, Summary>;
- using Sketch = tuple_sketch<Summary, Allocator>;
- using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
- using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
- using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, AllocEntry>;
+ using Entry = uint64_t;
+ using ExtractKey = trivial_extract_key;
+ using Sketch = theta_sketch_experimental<Allocator>;
+ using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+ using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, Allocator>;
- explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED);
+ explicit theta_a_not_b_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
/**
* Computes the a-not-b set operation given two sketches.
@@ -53,6 +49,6 @@ private:
} /* namespace datasketches */
-#include "tuple_a_not_b_impl.hpp"
+#include "theta_a_not_b_experimental_impl.hpp"
#endif
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/theta_a_not_b_experimental_impl.hpp
similarity index 78%
copy from tuple/include/tuple_a_not_b_impl.hpp
copy to tuple/include/theta_a_not_b_experimental_impl.hpp
index 4ba2e41..57b3f70 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/theta_a_not_b_experimental_impl.hpp
@@ -19,14 +19,14 @@
namespace datasketches {
-template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+template<typename A>
+theta_a_not_b_experimental<A>::theta_a_not_b_experimental(uint64_t seed, const A& allocator):
+state_(seed, allocator)
{}
-template<typename S, typename A>
+template<typename A>
template<typename SS>
-auto tuple_a_not_b<S, A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
+auto theta_a_not_b_experimental<A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
return state_.compute(std::forward<SS>(a), b, ordered);
}
diff --git a/tuple/include/theta_intersection_experimental.hpp b/tuple/include/theta_intersection_experimental.hpp
new file mode 100644
index 0000000..293b2e9
--- /dev/null
+++ b/tuple/include/theta_intersection_experimental.hpp
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_INTERSECTION_EXPERIMENTAL_HPP_
+#define THETA_INTERSECTION_EXPERIMENTAL_HPP_
+
+#include "theta_sketch_experimental.hpp"
+#include "theta_intersection_base.hpp"
+
+namespace datasketches {
+
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_intersection_experimental {
+public:
+ using Entry = uint64_t;
+ using ExtractKey = trivial_extract_key;
+ using Sketch = theta_sketch_experimental<Allocator>;
+ using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+
+ struct pass_through_policy {
+ uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+ unused(incoming_entry);
+ return internal_entry;
+ }
+ };
+ using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+ explicit theta_intersection_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+ /**
+ * Updates the intersection with a given sketch.
+ * The intersection can be viewed as starting from the "universe" set, and every update
+ * can reduce the current set to leave the overlapping subset only.
+ * @param sketch represents input set for the intersection
+ */
+ template<typename FwdSketch>
+ void update(FwdSketch&& sketch);
+
+ /**
+ * Produces a copy of the current state of the intersection.
+ * If update() was not called, the state is the infinite "universe",
+ * which is considered an undefined state, and throws an exception.
+ * @param ordered optional flag to specify if ordered sketch should be produced
+ * @return the result of the intersection
+ */
+ CompactSketch get_result(bool ordered = true) const;
+
+ /**
+ * Returns true if the state of the intersection is defined (not infinite "universe").
+ * @return true if the state is valid
+ */
+ bool has_result() const;
+
+private:
+ State state_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_intersection_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/theta_intersection_experimental_impl.hpp
similarity index 61%
copy from tuple/include/tuple_a_not_b_impl.hpp
copy to tuple/include/theta_intersection_experimental_impl.hpp
index 4ba2e41..e8bcfbb 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/theta_intersection_experimental_impl.hpp
@@ -19,15 +19,25 @@
namespace datasketches {
-template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+template<typename A>
+theta_intersection_experimental<A>::theta_intersection_experimental(uint64_t seed, const A& allocator):
+state_(seed, pass_through_policy(), allocator)
{}
-template<typename S, typename A>
+template<typename A>
template<typename SS>
-auto tuple_a_not_b<S, A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
- return state_.compute(std::forward<SS>(a), b, ordered);
+void theta_intersection_experimental<A>::update(SS&& sketch) {
+ state_.update(std::forward<SS>(sketch));
+}
+
+template<typename A>
+auto theta_intersection_experimental<A>::get_result(bool ordered) const -> CompactSketch {
+ return state_.get_result(ordered);
+}
+
+template<typename A>
+bool theta_intersection_experimental<A>::has_result() const {
+ return state_.has_result();
}
} /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 8f93248..838d8bc 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -29,13 +29,6 @@ namespace datasketches {
// experimental theta union derived from the same base as tuple union
-struct pass_through_policy {
- uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
- unused(incoming_entry);
- return internal_entry;
- }
-};
-
template<typename Allocator = std::allocator<uint64_t>>
class theta_union_experimental {
public:
@@ -45,6 +38,12 @@ public:
using CompactSketch = compact_theta_sketch_experimental<Allocator>;
using resize_factor = theta_constants::resize_factor;
+ struct pass_through_policy {
+ uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+ unused(incoming_entry);
+ return internal_entry;
+ }
+ };
using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
// No constructor here. Use builder instead.
diff --git a/tuple/include/tuple_a_not_b.hpp b/tuple/include/tuple_a_not_b.hpp
index 923e2e1..024406b 100644
--- a/tuple/include/tuple_a_not_b.hpp
+++ b/tuple/include/tuple_a_not_b.hpp
@@ -38,7 +38,7 @@ public:
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, AllocEntry>;
- explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED);
+ explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
/**
* Computes the a-not-b set operation given two sketches.
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/tuple_a_not_b_impl.hpp
index 4ba2e41..b7873c0 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/tuple_a_not_b_impl.hpp
@@ -20,8 +20,8 @@
namespace datasketches {
template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed, const A& allocator):
+state_(seed, allocator)
{}
template<typename S, typename A>
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 53a3a7e..90b6713 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -43,7 +43,9 @@ target_sources(tuple_test
tuple_union_test.cpp
tuple_intersection_test.cpp
tuple_a_not_b_test.cpp
+ array_of_doubles_sketch_test.cpp
theta_sketch_experimental_test.cpp
theta_union_experimental_test.cpp
- array_of_doubles_sketch_test.cpp
+ theta_intersection_experimental_test.cpp
+ theta_a_not_b_experimental_test.cpp
)
diff --git a/tuple/test/theta_a_not_b_experimental_test.cpp b/tuple/test/theta_a_not_b_experimental_test.cpp
new file mode 100644
index 0000000..6b44f8b
--- /dev/null
+++ b/tuple/test/theta_a_not_b_experimental_test.cpp
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_a_not_b_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_a_not_b = theta_a_not_b_experimental<>;
+
+TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
+ theta_a_not_b a_not_b;
+ auto a = update_theta_sketch::builder().build();
+ auto b = update_theta_sketch::builder().build();
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ a.update(1);
+ update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
+ theta_a_not_b a_not_b;
+
+ // B is still empty
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_num_retained() == 1);
+ REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
+ REQUIRE(result.get_estimate() == 1.0);
+
+ // B is not empty in estimation mode and no entries
+ b.update(1);
+ REQUIRE(b.get_num_retained() == 0U);
+
+ result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs, ordered result
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // unordered inputs, unordered result
+ result = a_not_b.compute(a, b, false);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE_FALSE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+
+ // A is ordered, so the result is ordered regardless
+ result = a_not_b.compute(a.compact(), b, false);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.is_ordered());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 1000.0);
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 1000.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ // ordered inputs
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ // ordered inputs
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ sketch.update(1); // non-empty should not be ignored
+ theta_a_not_b a_not_b(123);
+ REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
+}
+
+TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
+ update_theta_sketch a = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) a.update(value++);
+
+ update_theta_sketch b = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 25000; i++) b.update(value++);
+
+ theta_a_not_b a_not_b;
+
+ // unordered inputs
+ compact_theta_sketch result = a_not_b.compute(a, b);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+
+ // ordered inputs
+ result = a_not_b.compute(a.compact(), b.compact());
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_intersection_experimental_test.cpp b/tuple/test/theta_intersection_experimental_test.cpp
new file mode 100644
index 0000000..3337636
--- /dev/null
+++ b/tuple/test/theta_intersection_experimental_test.cpp
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_intersection_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_intersection = theta_intersection_experimental<>;
+
+TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
+ theta_intersection intersection;
+ REQUIRE_FALSE(intersection.has_result());
+ REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
+}
+
+TEST_CASE("theta intersection: empty", "[theta_intersection]") {
+ theta_intersection intersection;
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ intersection.update(sketch);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
+ sketch.update(1);
+ theta_intersection intersection;
+ intersection.update(sketch);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+
+ intersection.update(sketch);
+ result = intersection.get_result();
+ REQUIRE(result.get_num_retained() == 0);
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 500;
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE(result.is_empty());
+ REQUIRE_FALSE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ value = 5000;
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1);
+ intersection.update(sketch2);
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+ int value = 0;
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+ theta_intersection intersection;
+ intersection.update(sketch1.compact());
+ intersection.update(sketch2.compact());
+ compact_theta_sketch result = intersection.get_result();
+ REQUIRE_FALSE(result.is_empty());
+ REQUIRE(result.is_estimation_mode());
+ REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
+ sketch.update(1); // non-empty should not be ignored
+ theta_intersection intersection(123);
+ REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index e0ccdf3..61435a4 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -31,6 +31,9 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
const std::string inputPath = "test/";
#endif
+// These tests have been copied from the existing theta sketch implementation.
+// Serialization as base class and serialization of update sketch have been removed.
+
using update_theta_sketch = update_theta_sketch_experimental<>;
using compact_theta_sketch = compact_theta_sketch_experimental<>;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org