You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/08/24 22:20:08 UTC

[incubator-datasketches-cpp] 01/02: theta intersection and a-not-b

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 73231d23402fb5bc2ba34fc8656b9232b0e61552
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Aug 24 15:19:22 2020 -0700

    theta intersection and a-not-b
---
 tuple/CMakeLists.txt                               |  12 +-
 ..._a_not_b.hpp => theta_a_not_b_experimental.hpp} |  28 +--
 ...mpl.hpp => theta_a_not_b_experimental_impl.hpp} |  10 +-
 tuple/include/theta_intersection_experimental.hpp  |  78 +++++++
 ...pp => theta_intersection_experimental_impl.hpp} |  22 +-
 tuple/include/theta_union_experimental.hpp         |  13 +-
 tuple/include/tuple_a_not_b.hpp                    |   2 +-
 tuple/include/tuple_a_not_b_impl.hpp               |   4 +-
 tuple/test/CMakeLists.txt                          |   4 +-
 tuple/test/theta_a_not_b_experimental_test.cpp     | 250 +++++++++++++++++++++
 .../test/theta_intersection_experimental_test.cpp  | 224 ++++++++++++++++++
 tuple/test/theta_sketch_experimental_test.cpp      |   3 +
 12 files changed, 609 insertions(+), 41 deletions(-)

diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
index fce73ab..411c712 100644
--- a/tuple/CMakeLists.txt
+++ b/tuple/CMakeLists.txt
@@ -37,13 +37,15 @@ list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hp
 list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
 list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
 list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
+list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_update_sketch_base.hpp;include/theta_update_sketch_base_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_union_base.hpp;include/theta_union_base_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_intersection_base.hpp;include/theta_intersection_base_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_set_difference_base_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
 list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
-list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_intersection_experimental.hpp;include/theta_intersection_experimental_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_a_not_b_experimental.hpp;include/theta_a_not_b_experimental_impl.hpp")
 
 install(TARGETS tuple
   EXPORT ${PROJECT_NAME}
@@ -62,6 +64,8 @@ target_sources(tuple
     ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base_impl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base.hpp
@@ -74,6 +78,8 @@ target_sources(tuple
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental_impl.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental_impl.hpp
 )
diff --git a/tuple/include/tuple_a_not_b.hpp b/tuple/include/theta_a_not_b_experimental.hpp
similarity index 66%
copy from tuple/include/tuple_a_not_b.hpp
copy to tuple/include/theta_a_not_b_experimental.hpp
index 923e2e1..ed29ce5 100644
--- a/tuple/include/tuple_a_not_b.hpp
+++ b/tuple/include/theta_a_not_b_experimental.hpp
@@ -17,28 +17,24 @@
  * under the License.
  */
 
-#ifndef TUPLE_A_NOT_B_HPP_
-#define TUPLE_A_NOT_B_HPP_
+#ifndef THETA_A_NOT_B_EXPERIMENTAL_HPP_
+#define THETA_A_NOT_B_EXPERIMENTAL_HPP_
 
-#include "tuple_sketch.hpp"
+#include "theta_sketch_experimental.hpp"
 #include "theta_set_difference_base.hpp"
 
 namespace datasketches {
 
-template<
-  typename Summary,
-  typename Allocator = std::allocator<Summary>
->
-class tuple_a_not_b {
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_a_not_b_experimental {
 public:
-  using Entry = std::pair<uint64_t, Summary>;
-  using ExtractKey = pair_extract_key<uint64_t, Summary>;
-  using Sketch = tuple_sketch<Summary, Allocator>;
-  using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
-  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
-  using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, AllocEntry>;
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_experimental<Allocator>;
+  using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+  using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, Allocator>;
 
-  explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED);
+  explicit theta_a_not_b_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Computes the a-not-b set operation given two sketches.
@@ -53,6 +49,6 @@ private:
 
 } /* namespace datasketches */
 
-#include "tuple_a_not_b_impl.hpp"
+#include "theta_a_not_b_experimental_impl.hpp"
 
 #endif
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/theta_a_not_b_experimental_impl.hpp
similarity index 78%
copy from tuple/include/tuple_a_not_b_impl.hpp
copy to tuple/include/theta_a_not_b_experimental_impl.hpp
index 4ba2e41..57b3f70 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/theta_a_not_b_experimental_impl.hpp
@@ -19,14 +19,14 @@
 
 namespace datasketches {
 
-template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+template<typename A>
+theta_a_not_b_experimental<A>::theta_a_not_b_experimental(uint64_t seed, const A& allocator):
+state_(seed, allocator)
 {}
 
-template<typename S, typename A>
+template<typename A>
 template<typename SS>
-auto tuple_a_not_b<S, A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
+auto theta_a_not_b_experimental<A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
   return state_.compute(std::forward<SS>(a), b, ordered);
 }
 
diff --git a/tuple/include/theta_intersection_experimental.hpp b/tuple/include/theta_intersection_experimental.hpp
new file mode 100644
index 0000000..293b2e9
--- /dev/null
+++ b/tuple/include/theta_intersection_experimental.hpp
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_INTERSECTION_EXPERIMENTAL_HPP_
+#define THETA_INTERSECTION_EXPERIMENTAL_HPP_
+
+#include "theta_sketch_experimental.hpp"
+#include "theta_intersection_base.hpp"
+
+namespace datasketches {
+
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_intersection_experimental {
+public:
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using Sketch = theta_sketch_experimental<Allocator>;
+  using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
+  using State = theta_intersection_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+  explicit theta_intersection_experimental(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
+
+  /**
+   * Updates the intersection with a given sketch.
+   * The intersection can be viewed as starting from the "universe" set, and every update
+   * can reduce the current set to leave the overlapping subset only.
+   * @param sketch represents input set for the intersection
+   */
+  template<typename FwdSketch>
+  void update(FwdSketch&& sketch);
+
+  /**
+   * Produces a copy of the current state of the intersection.
+   * If update() was not called, the state is the infinite "universe",
+   * which is considered an undefined state, and throws an exception.
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return the result of the intersection
+   */
+  CompactSketch get_result(bool ordered = true) const;
+
+  /**
+   * Returns true if the state of the intersection is defined (not infinite "universe").
+   * @return true if the state is valid
+   */
+  bool has_result() const;
+
+private:
+  State state_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_intersection_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/theta_intersection_experimental_impl.hpp
similarity index 61%
copy from tuple/include/tuple_a_not_b_impl.hpp
copy to tuple/include/theta_intersection_experimental_impl.hpp
index 4ba2e41..e8bcfbb 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/theta_intersection_experimental_impl.hpp
@@ -19,15 +19,25 @@
 
 namespace datasketches {
 
-template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+template<typename A>
+theta_intersection_experimental<A>::theta_intersection_experimental(uint64_t seed, const A& allocator):
+state_(seed, pass_through_policy(), allocator)
 {}
 
-template<typename S, typename A>
+template<typename A>
 template<typename SS>
-auto tuple_a_not_b<S, A>::compute(SS&& a, const Sketch& b, bool ordered) const -> CompactSketch {
-  return state_.compute(std::forward<SS>(a), b, ordered);
+void theta_intersection_experimental<A>::update(SS&& sketch) {
+  state_.update(std::forward<SS>(sketch));
+}
+
+template<typename A>
+auto theta_intersection_experimental<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
+}
+
+template<typename A>
+bool theta_intersection_experimental<A>::has_result() const {
+  return state_.has_result();
 }
 
 } /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 8f93248..838d8bc 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -29,13 +29,6 @@ namespace datasketches {
 
 // experimental theta union derived from the same base as tuple union
 
-struct pass_through_policy {
-  uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
-    unused(incoming_entry);
-    return internal_entry;
-  }
-};
-
 template<typename Allocator = std::allocator<uint64_t>>
 class theta_union_experimental {
 public:
@@ -45,6 +38,12 @@ public:
   using CompactSketch = compact_theta_sketch_experimental<Allocator>;
   using resize_factor = theta_constants::resize_factor;
 
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      unused(incoming_entry);
+      return internal_entry;
+    }
+  };
   using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
 
   // No constructor here. Use builder instead.
diff --git a/tuple/include/tuple_a_not_b.hpp b/tuple/include/tuple_a_not_b.hpp
index 923e2e1..024406b 100644
--- a/tuple/include/tuple_a_not_b.hpp
+++ b/tuple/include/tuple_a_not_b.hpp
@@ -38,7 +38,7 @@ public:
   using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
   using State = theta_set_difference_base<Entry, ExtractKey, Sketch, CompactSketch, AllocEntry>;
 
-  explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED);
+  explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
 
   /**
    * Computes the a-not-b set operation given two sketches.
diff --git a/tuple/include/tuple_a_not_b_impl.hpp b/tuple/include/tuple_a_not_b_impl.hpp
index 4ba2e41..b7873c0 100644
--- a/tuple/include/tuple_a_not_b_impl.hpp
+++ b/tuple/include/tuple_a_not_b_impl.hpp
@@ -20,8 +20,8 @@
 namespace datasketches {
 
 template<typename S, typename A>
-tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed):
-state_(seed)
+tuple_a_not_b<S, A>::tuple_a_not_b(uint64_t seed, const A& allocator):
+state_(seed, allocator)
 {}
 
 template<typename S, typename A>
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 53a3a7e..90b6713 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -43,7 +43,9 @@ target_sources(tuple_test
     tuple_union_test.cpp
     tuple_intersection_test.cpp
     tuple_a_not_b_test.cpp
+    array_of_doubles_sketch_test.cpp
     theta_sketch_experimental_test.cpp
     theta_union_experimental_test.cpp
-    array_of_doubles_sketch_test.cpp
+    theta_intersection_experimental_test.cpp
+    theta_a_not_b_experimental_test.cpp
 )
diff --git a/tuple/test/theta_a_not_b_experimental_test.cpp b/tuple/test/theta_a_not_b_experimental_test.cpp
new file mode 100644
index 0000000..6b44f8b
--- /dev/null
+++ b/tuple/test/theta_a_not_b_experimental_test.cpp
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_a_not_b_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_a_not_b = theta_a_not_b_experimental<>;
+
+TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
+  theta_a_not_b a_not_b;
+  auto a = update_theta_sketch::builder().build();
+  auto b = update_theta_sketch::builder().build();
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  a.update(1);
+  update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
+  theta_a_not_b a_not_b;
+
+  // B is still empty
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_num_retained() == 1);
+  REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
+  REQUIRE(result.get_estimate() == 1.0);
+
+  // B is not empty in estimation mode and no entries
+  b.update(1);
+  REQUIRE(b.get_num_retained() == 0U);
+
+  result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) a.update(value++);
+
+  update_theta_sketch b = update_theta_sketch::builder().build();
+  value = 500;
+  for (int i = 0; i < 1000; i++) b.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs, ordered result
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.is_ordered());
+  REQUIRE(result.get_estimate() == 500.0);
+
+  // unordered inputs, unordered result
+  result = a_not_b.compute(a, b, false);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE_FALSE(result.is_ordered());
+  REQUIRE(result.get_estimate() == 500.0);
+
+  // ordered inputs
+  result = a_not_b.compute(a.compact(), b.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.is_ordered());
+  REQUIRE(result.get_estimate() == 500.0);
+
+  // A is ordered, so the result is ordered regardless
+  result = a_not_b.compute(a.compact(), b, false);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.is_ordered());
+  REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) a.update(value++);
+
+  update_theta_sketch b = update_theta_sketch::builder().build();
+  for (int i = 0; i < 1000; i++) b.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 1000.0);
+
+  // ordered inputs
+  result = a_not_b.compute(a.compact(), b.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 1000.0);
+}
+
+TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
+  update_theta_sketch sketch = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+
+  // ordered inputs
+  result = a_not_b.compute(sketch.compact(), sketch.compact());
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) a.update(value++);
+
+  update_theta_sketch b = update_theta_sketch::builder().build();
+  value = 5000;
+  for (int i = 0; i < 10000; i++) b.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+
+  // ordered inputs
+  result = a_not_b.compute(a.compact(), b.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) a.update(value++);
+
+  update_theta_sketch b = update_theta_sketch::builder().build();
+  for (int i = 0; i < 10000; i++) b.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+
+  // ordered inputs
+  result = a_not_b.compute(a.compact(), b.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
+}
+
+TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
+  update_theta_sketch sketch = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(sketch, sketch);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+
+  // ordered inputs
+  result = a_not_b.compute(sketch.compact(), sketch.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
+  update_theta_sketch sketch = update_theta_sketch::builder().build();
+  sketch.update(1); // non-empty should not be ignored
+  theta_a_not_b a_not_b(123);
+  REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
+}
+
+TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
+  update_theta_sketch a = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) a.update(value++);
+
+  update_theta_sketch b = update_theta_sketch::builder().build();
+  value = 5000;
+  for (int i = 0; i < 25000; i++) b.update(value++);
+
+  theta_a_not_b a_not_b;
+
+  // unordered inputs
+  compact_theta_sketch result = a_not_b.compute(a, b);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+
+  // ordered inputs
+  result = a_not_b.compute(a.compact(), b.compact());
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_intersection_experimental_test.cpp b/tuple/test/theta_intersection_experimental_test.cpp
new file mode 100644
index 0000000..3337636
--- /dev/null
+++ b/tuple/test/theta_intersection_experimental_test.cpp
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch.hpp>
+
+#include <theta_intersection_experimental.hpp>
+
+namespace datasketches {
+
+// These tests have been copied from the existing theta sketch implementation.
+
+using update_theta_sketch = update_theta_sketch_experimental<>;
+using compact_theta_sketch = compact_theta_sketch_experimental<>;
+using theta_intersection = theta_intersection_experimental<>;
+
+TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
+  theta_intersection intersection;
+  REQUIRE_FALSE(intersection.has_result());
+  REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
+}
+
+TEST_CASE("theta intersection: empty", "[theta_intersection]") {
+  theta_intersection intersection;
+  update_theta_sketch sketch = update_theta_sketch::builder().build();
+  intersection.update(sketch);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+
+  intersection.update(sketch);
+  result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
+  update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
+  sketch.update(1);
+  theta_intersection intersection;
+  intersection.update(sketch);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+  REQUIRE(result.get_estimate() == 0.0);
+
+  intersection.update(sketch);
+  result = intersection.get_result();
+  REQUIRE(result.get_num_retained() == 0);
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  value = 500;
+  for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1);
+  intersection.update(sketch2);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  value = 500;
+  for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1.compact());
+  intersection.update(sketch2.compact());
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 500.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1);
+  intersection.update(sketch2);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 1000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  for (int i = 0; i < 1000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1.compact());
+  intersection.update(sketch2.compact());
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE(result.is_empty());
+  REQUIRE_FALSE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  value = 5000;
+  for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1);
+  intersection.update(sketch2);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  value = 5000;
+  for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1.compact());
+  intersection.update(sketch2.compact());
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1);
+  intersection.update(sketch2);
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
+  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
+  int value = 0;
+  for (int i = 0; i < 10000; i++) sketch1.update(value++);
+
+  update_theta_sketch sketch2 = update_theta_sketch::builder().build();
+  for (int i = 0; i < 10000; i++) sketch2.update(value++);
+
+  theta_intersection intersection;
+  intersection.update(sketch1.compact());
+  intersection.update(sketch2.compact());
+  compact_theta_sketch result = intersection.get_result();
+  REQUIRE_FALSE(result.is_empty());
+  REQUIRE(result.is_estimation_mode());
+  REQUIRE(result.get_estimate() == 0.0);
+}
+
+TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
+  update_theta_sketch sketch = update_theta_sketch::builder().build();
+  sketch.update(1); // non-empty should not be ignored
+  theta_intersection intersection(123);
+  REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index e0ccdf3..61435a4 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -31,6 +31,9 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
 const std::string inputPath = "test/";
 #endif
 
+// These tests have been copied from the existing theta sketch implementation.
+// Serialization as base class and serialization of update sketch have been removed.
+
 using update_theta_sketch = update_theta_sketch_experimental<>;
 using compact_theta_sketch = compact_theta_sketch_experimental<>;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org