You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/06/19 20:51:39 UTC

[incubator-datasketches-cpp] branch tuple_sketch created (now 2ed9399)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


      at 2ed9399  partially implemented working prototype

This branch includes the following new commits:

     new 2ed9399  partially implemented working prototype

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 01/01: partially implemented working prototype

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 2ed9399d83162e9a11f1b44389a3b37ac3984160
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jun 19 13:51:23 2020 -0700

    partially implemented working prototype
---
 CMakeLists.txt                                   |   1 +
 {theta => common}/include/binomial_bounds.hpp    |   0
 common/test/test_type.hpp                        |  20 +-
 tuple/CMakeLists.txt                             |  64 +++
 tuple/include/theta_sketch_experimental.hpp      |  95 +++++
 tuple/include/theta_sketch_experimental_impl.hpp | 152 +++++++
 tuple/include/theta_union_base.hpp               |  56 +++
 tuple/include/theta_union_base_impl.hpp          |  61 +++
 tuple/include/theta_union_experimental.hpp       |  87 ++++
 tuple/include/theta_union_experimental_impl.hpp  |  42 ++
 tuple/include/theta_update_sketch_base.hpp       | 227 ++++++++++
 tuple/include/theta_update_sketch_base_impl.hpp  | 255 ++++++++++++
 tuple/include/tuple_sketch.hpp                   | 502 +++++++++++++++++++++++
 tuple/include/tuple_sketch_impl.hpp              | 313 ++++++++++++++
 tuple/include/tuple_union.hpp                    | 109 +++++
 tuple/include/tuple_union_impl.hpp               |  46 +++
 tuple/test/CMakeLists.txt                        |  45 ++
 tuple/test/theta_sketch_experimental_test.cpp    |  56 +++
 tuple/test/theta_union_experimental_test.cpp     |  47 +++
 tuple/test/tuple_sketch_test.cpp                 | 189 +++++++++
 tuple/test/tuple_union_test.cpp                  |  45 ++
 21 files changed, 2411 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c98c4a2..ff1d4a5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -95,6 +95,7 @@ add_subdirectory(kll)
 add_subdirectory(fi)
 add_subdirectory(theta)
 add_subdirectory(sampling)
+add_subdirectory(tuple)
 
 if (WITH_PYTHON)
   add_subdirectory(python)
diff --git a/theta/include/binomial_bounds.hpp b/common/include/binomial_bounds.hpp
similarity index 100%
rename from theta/include/binomial_bounds.hpp
rename to common/include/binomial_bounds.hpp
diff --git a/common/test/test_type.hpp b/common/test/test_type.hpp
index abbd8f3..1651c9a 100644
--- a/common/test/test_type.hpp
+++ b/common/test/test_type.hpp
@@ -25,7 +25,7 @@
 namespace datasketches {
 
 class test_type {
-  static const bool DEBUG = false;
+  static const bool DEBUG = true;
 public:
   // no default constructor should be required
   test_type(int value): value(value) {
@@ -91,6 +91,24 @@ struct test_type_serde {
   size_t size_of_item(const test_type&) {
     return sizeof(int);
   }
+  size_t serialize(void* ptr, size_t capacity, const test_type* items, unsigned num) {
+    const size_t bytes_written = sizeof(int) * num;
+    check_memory_size(bytes_written, capacity);
+    for (unsigned i = 0; i < num; ++i) {
+      memcpy(ptr, &items[i], sizeof(int));
+      ptr = static_cast<char*>(ptr) + sizeof(int);
+    }
+    return bytes_written;
+  }
+  size_t deserialize(const void* ptr, size_t capacity, test_type* items, unsigned num) {
+    const size_t bytes_read = sizeof(int) * num;
+    check_memory_size(bytes_read, capacity);
+    for (unsigned i = 0; i < num; ++i) {
+      memcpy(&items[i], ptr, sizeof(int));
+      ptr = static_cast<const char*>(ptr) + sizeof(int);
+    }
+    return bytes_read;
+  }
 };
 
 std::ostream& operator<<(std::ostream& os, const test_type& a) {
diff --git a/tuple/CMakeLists.txt b/tuple/CMakeLists.txt
new file mode 100644
index 0000000..17fc2b1
--- /dev/null
+++ b/tuple/CMakeLists.txt
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_library(tuple INTERFACE)
+
+add_library(${PROJECT_NAME}::TUPLE ALIAS tuple)
+
+if (BUILD_TESTS)
+  add_subdirectory(test)
+endif()
+
+target_include_directories(tuple
+  INTERFACE
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
+)
+
+target_link_libraries(tuple INTERFACE common)
+target_compile_features(tuple INTERFACE cxx_std_11)
+
+set(tuple_HEADERS "")
+list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
+list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_update_sketch_base.hpp;include/theta_update_sketch_base_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_union_base.hpp;include/theta_union_base_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
+list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
+
+install(TARGETS tuple
+  EXPORT ${PROJECT_NAME}
+)
+
+install(FILES ${tuple_HEADERS}
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
+
+target_sources(tuple
+  INTERFACE
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental_impl.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental.hpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
+)
diff --git a/tuple/include/theta_sketch_experimental.hpp b/tuple/include/theta_sketch_experimental.hpp
new file mode 100644
index 0000000..9c8bd3d
--- /dev/null
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_SKETCH_EXPERIMENTAL_HPP_
+#define THETA_SKETCH_EXPERIMENTAL_HPP_
+
+#include "serde.hpp"
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+// experimental theta sketch derived from the same base as tuple sketch
+
+template<typename A> class compact_theta_sketch_experimental;
+
+template<typename A = std::allocator<uint64_t>>
+class theta_sketch_experimental {
+public:
+  using resize_factor = theta_constants::resize_factor;
+  using AllocBytes = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  class builder: public theta_base_builder<true> {
+  public:
+      theta_sketch_experimental build() const;
+  };
+
+  bool is_empty() const { return table_.is_empty_; }
+  bool is_ordered() const { return false; }
+  uint16_t get_seed_hash() const { return compute_seed_hash(DEFAULT_SEED); }
+  uint64_t get_theta64() const { return table_.theta_; }
+  uint32_t get_num_retained() const { return table_.num_entries_; }
+
+  void update(uint64_t key);
+  void update(const void* key, size_t length);
+
+  string<A> to_string(bool detail = false) const;
+
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
+
+  using const_iterator = theta_const_iterator<uint64_t, trivial_extract_key<uint64_t>>;
+  const_iterator begin() const;
+  const_iterator end() const;
+
+  compact_theta_sketch_experimental<A> compact(bool ordered = true) const;
+
+private:
+  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
+  typedef theta_update_sketch_base<uint64_t, trivial_extract_key<uint64_t>, A> theta_table;
+  theta_table table_;
+
+  theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+};
+
+template<typename A = std::allocator<uint64_t>>
+class compact_theta_sketch_experimental {
+public:
+  compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered);
+
+  template<typename InputIt>
+  compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, InputIt first, InputIt last);
+
+  uint32_t get_num_retained() const { return entries_.size(); }
+
+  string<A> to_string(bool detail = false) const;
+
+private:
+  bool is_empty_;
+  bool is_ordered_;
+  uint16_t seed_hash_;
+  uint64_t theta_;
+  std::vector<uint64_t, A> entries_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_sketch_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp b/tuple/include/theta_sketch_experimental_impl.hpp
new file mode 100644
index 0000000..347022f
--- /dev/null
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <sstream>
+
+namespace datasketches {
+
+// experimental update theta sketch derived from the same base as tuple sketch
+
+template<typename A>
+theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
+table_(lg_cur_size, lg_nom_size, rf, p, seed)
+{}
+
+template<typename A>
+void theta_sketch_experimental<A>::update(uint64_t key) {
+  update(&key, sizeof(key));
+}
+
+template<typename A>
+void theta_sketch_experimental<A>::update(const void* key, size_t length) {
+  const uint64_t hash = compute_hash(key, length, DEFAULT_SEED);
+  if (hash >= table_.theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
+  auto result = table_.find(hash);
+  if (!result.second) {
+    table_.insert(result.first, hash);
+  }
+}
+
+template<typename A>
+string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  auto type = typeid(*this).name();
+  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
+  os << table_.to_string();
+  if (detail) {
+    for (const auto& it: table_) {
+      if (it != 0) {
+        os << it << std::endl;
+      }
+    }
+  }
+  return os.str();
+}
+
+template<typename A>
+auto theta_sketch_experimental<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
+  const uint8_t preamble_longs = 3;
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * (1 << table_.lg_cur_size_);
+  vector_bytes bytes(size);
+  uint8_t* ptr = bytes.data() + header_size_bytes;
+
+  const uint8_t preamble_longs_and_rf = preamble_longs | (table_.rf_ << 6);
+  ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
+  const uint8_t serial_version = 0;
+  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
+  const uint8_t type = 0;
+  ptr += copy_to_mem(&type, ptr, sizeof(type));
+  ptr += copy_to_mem(&table_.lg_nom_size_, ptr, sizeof(table_.lg_nom_size_));
+  ptr += copy_to_mem(&table_.lg_cur_size_, ptr, sizeof(table_.lg_cur_size_));
+  const uint8_t flags_byte(
+    (this->is_empty() ? 1 << flags::IS_EMPTY : 0)
+  );
+  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
+  const uint16_t seed_hash = 0;
+  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
+  ptr += copy_to_mem(&table_.num_entries_, ptr, sizeof(table_.num_entries_));
+  const float p = 1;
+  ptr += copy_to_mem(&p, ptr, sizeof(p));
+  ptr += copy_to_mem(&table_.theta_, ptr, sizeof(table_.theta_));
+  ptr += copy_to_mem(table_.entries_, ptr, sizeof(uint64_t) * (1 << table_.lg_cur_size_));
+
+  return bytes;
+}
+
+template<typename A>
+auto theta_sketch_experimental<A>::begin() const -> const_iterator {
+  return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
+}
+
+template<typename A>
+auto theta_sketch_experimental<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
+}
+
+template<typename A>
+theta_sketch_experimental<A> theta_sketch_experimental<A>::builder::build() const {
+  return theta_sketch_experimental(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
+}
+
+template<typename A>
+compact_theta_sketch_experimental<A> theta_sketch_experimental<A>::compact(bool ordered) const {
+  return compact_theta_sketch_experimental<A>(*this, ordered);
+}
+
+// experimental compact theta sketch
+
+template<typename A>
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered):
+is_empty_(other.is_empty()),
+is_ordered_(other.is_ordered()),
+seed_hash_(other.get_seed_hash()),
+theta_(other.get_theta64()),
+entries_()
+{
+  entries_.reserve(other.get_num_retained());
+  std::copy(other.begin(), other.end(), std::back_inserter(entries_));
+  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
+}
+
+template<typename A>
+template<typename InputIt>
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, InputIt first, InputIt last):
+is_empty_(is_empty),
+is_ordered_(is_ordered),
+seed_hash_(seed_hash),
+theta_(theta),
+entries_()
+{
+  std::copy_if(first, last, std::back_inserter(entries_), [theta](uint64_t value) { return value != 0 && value < theta; });
+}
+
+template<typename A>
+string<A> compact_theta_sketch_experimental<A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  auto type = typeid(*this).name();
+  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
+  if (detail) {
+    for (const auto& hash: entries_) {
+      os << hash << std::endl;
+    }
+  }
+  return os.str();
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/theta_union_base.hpp b/tuple/include/theta_union_base.hpp
new file mode 100644
index 0000000..e62dc79
--- /dev/null
+++ b/tuple/include/theta_union_base.hpp
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_BASE_HPP_
+#define THETA_UNION_BASE_HPP_
+
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Policy,
+  typename Sketch,
+  typename CompactSketch,
+  typename Allocator = std::allocator<Entry>
+>
+class theta_union_base {
+public:
+  using hash_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = theta_constants::resize_factor;
+
+  theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, Policy policy);
+
+  void update(const Sketch& sketch);
+
+  CompactSketch get_result(bool ordered = true) const;
+
+private:
+  Policy policy_;
+  hash_table table_;
+  uint64_t union_theta_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_union_base_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
new file mode 100644
index 0000000..2e6def6
--- /dev/null
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+namespace datasketches {
+
+template<typename E, typename EK, typename P, typename S, typename CS, typename A>
+theta_union_base<E, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, P policy):
+policy_(policy),
+table_(lg_cur_size, lg_nom_size, rf, p, seed),
+union_theta_(table_.theta_)
+{}
+
+template<typename E, typename EK, typename P, typename S, typename CS, typename A>
+void theta_union_base<E, EK, P, S, CS, A>::update(const S& sketch) {
+  if (sketch.is_empty()) return;
+  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
+  table_.is_empty_ = false;
+  if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
+  for (const auto& entry: sketch) {
+    const uint64_t hash = EK()(entry);
+    if (hash < union_theta_) {
+      auto result = table_.find(hash);
+      if (!result.second) {
+        table_.insert(result.first, entry);
+      } else {
+        *result.first = policy_(*result.first, entry);
+      }
+    } else {
+      if (sketch.is_ordered()) break; // early stop
+    }
+  }
+  if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
+}
+
+template<typename E, typename EK, typename P, typename S, typename CS, typename A>
+CS theta_union_base<E, EK, P, S, CS, A>::get_result(bool ordered) const {
+  // relies on the constructor to skip holes and hashes < theta
+  return CS(table_.is_empty_, ordered, DEFAULT_SEED, theta_constants::MAX_THETA, table_.begin(), table_.end());
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
new file mode 100644
index 0000000..585c5fe
--- /dev/null
+++ b/tuple/include/theta_union_experimental.hpp
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UNION_EXPERIMENTAL_HPP_
+#define THETA_UNION_EXPERIMENTAL_HPP_
+
+#include "serde.hpp"
+#include "tuple_sketch.hpp"
+#include "theta_union_base.hpp"
+#include "theta_sketch_experimental.hpp"
+
+namespace datasketches {
+
+// experimental theta union derived from the same base as tuple union
+
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_union_experimental {
+public:
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key<uint64_t>;
+  using Sketch = theta_sketch_experimental<Allocator>;
+  using CompactSketch = compact_theta_sketch_experimental<Allocator>;
+  using resize_factor = theta_constants::resize_factor;
+
+  struct pass_through_policy {
+    uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
+      return internal_entry;
+    }
+  };
+
+  using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
+
+  // No constructor here. Use builder instead.
+  class builder;
+
+  /**
+   * This method is to update the union with a given sketch
+   * @param sketch to update the union with
+   */
+  void update(const Sketch& sketch);
+
+  /**
+   * This method produces a copy of the current state of the union as a compact sketch.
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return the result of the union
+   */
+  CompactSketch get_result(bool ordered = true) const;
+
+private:
+  State state_;
+
+  // for builder
+  theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+};
+
+template<typename A>
+class theta_union_experimental<A>::builder: public theta_base_builder<true> {
+public:
+
+  /**
+   * This is to create an instance of the union with predefined parameters.
+   * @return an instance of the union
+   */
+  theta_union_experimental<A> build() const;
+};
+
+} /* namespace datasketches */
+
+#include "theta_union_experimental_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_union_experimental_impl.hpp b/tuple/include/theta_union_experimental_impl.hpp
new file mode 100644
index 0000000..be75024
--- /dev/null
+++ b/tuple/include/theta_union_experimental_impl.hpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename A>
+theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, pass_through_policy())
+{}
+
+template<typename A>
+void theta_union_experimental<A>::update(const Sketch& sketch) {
+  state_.update(sketch);
+}
+
+template<typename A>
+auto theta_union_experimental<A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
+}
+
+template<typename A>
+auto theta_union_experimental<A>::builder::build() const -> theta_union_experimental {
+  return theta_union_experimental(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/theta_update_sketch_base.hpp b/tuple/include/theta_update_sketch_base.hpp
new file mode 100644
index 0000000..e90091f
--- /dev/null
+++ b/tuple/include/theta_update_sketch_base.hpp
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef THETA_UPDATE_SKETCH_BASE_HPP_
+#define THETA_UPDATE_SKETCH_BASE_HPP_
+
+#include <vector>
+#include <climits>
+
+#include "common_defs.hpp"
+#include "MurmurHash3.h"
+
+namespace datasketches {
+
+namespace theta_constants {
+  enum resize_factor { X1, X2, X4, X8 };
+  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
+}
+
+template<
+  typename Entry,
+  typename ExtractKey,
+  typename Allocator = std::allocator<Entry>
+>
+struct theta_update_sketch_base {
+  using resize_factor = theta_constants::resize_factor;
+
+  theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+  // TODO: copy and move
+  ~theta_update_sketch_base();
+
+  typedef Entry* iterator;
+
+  std::pair<iterator, bool> find(uint64_t key) const;
+
+  template<typename FwdEntry>
+  void insert(iterator it, FwdEntry&& entry);
+
+  iterator begin() const;
+  iterator end() const;
+
+  string<Allocator> to_string() const;
+
+  // resize threshold = 0.5 tuned for speed
+  static constexpr double RESIZE_THRESHOLD = 0.5;
+  // hash table rebuild threshold = 15/16
+  static constexpr double REBUILD_THRESHOLD = 15.0 / 16.0;
+
+  static constexpr uint8_t STRIDE_HASH_BITS = 7;
+  static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
+
+  bool is_empty_;
+  uint8_t lg_cur_size_;
+  uint8_t lg_nom_size_;
+  resize_factor rf_;
+  float p_;
+  uint32_t num_entries_;
+  uint64_t theta_;
+  uint64_t seed_;
+  Entry* entries_;
+
+  void resize();
+  void rebuild();
+
+  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
+  static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
+
+  struct comparator {
+    bool operator()(Entry& a, Entry& b) const {
+      return ExtractKey()(a) < ExtractKey()(b);
+    }
+  };
+};
+
+// builder
+
+template<bool dummy>
+class theta_base_builder {
+public:
+  using resize_factor = theta_constants::resize_factor;
+  static const uint8_t MIN_LG_K = 5;
+  static const uint8_t DEFAULT_LG_K = 12;
+  static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+
+  /**
+   * Creates and instance of the builder with default parameters.
+   */
+  theta_base_builder();
+
+  /**
+   * Set log2(k), where k is a nominal number of entries in the sketch
+   * @param lg_k base 2 logarithm of nominal number of entries
+   * @return this builder
+   */
+  theta_base_builder& set_lg_k(uint8_t lg_k);
+
+  /**
+   * Set resize factor for the internal hash table (defaults to 8)
+   * @param rf resize factor
+   * @return this builder
+   */
+  theta_base_builder& set_resize_factor(resize_factor rf);
+
+  /**
+   * Set sampling probability (initial theta). The default is 1, so the sketch retains
+   * all entries until it reaches the limit, at which point it goes into the estimation mode
+   * and reduces the effective sampling probability (theta) as necessary.
+   * @param p sampling probability
+   * @return this builder
+   */
+  theta_base_builder& set_p(float p);
+
+  /**
+   * Set the seed for the hash function. Should be used carefully if needed.
+   * Sketches produced with different seed are not compatible
+   * and cannot be mixed in set operations.
+   * @param seed hash seed
+   * @return this builder
+   */
+  theta_base_builder& set_seed(uint64_t seed);
+
+protected:
+  uint8_t lg_k_;
+  resize_factor rf_;
+  float p_;
+  uint64_t seed_;
+
+  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
+};
+
+// key extractors
+
+template<typename T>
+struct trivial_extract_key {
+  T& operator()(T& entry) const {
+    return entry;
+  }
+  const T& operator()(const T& entry) const {
+    return entry;
+  }
+};
+
+template<typename K, typename V>
+struct pair_extract_key {
+  K& operator()(std::pair<K, V>& entry) const {
+    return entry.first;
+  }
+  const K& operator()(const std::pair<K, V>& entry) const {
+    return entry.first;
+  }
+};
+
+// less than
+
+template<typename T>
+class less_than {
+public:
+  explicit less_than(const T& value): value(value) {}
+  bool operator()(const T& value) const { return value < this->value; }
+private:
+  T value;
+};
+
+template<typename Key, typename Entry, typename ExtractKey>
+class key_less_than {
+public:
+  explicit key_less_than(const Key& key): key(key) {}
+  bool operator()(const Entry& entry) const {
+    return ExtractKey()(entry) < this->key;
+  }
+private:
+  Key key;
+};
+
+// MurMur3 hash functions
+
+static inline uint64_t compute_hash(const void* data, size_t length, uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(data, length, seed, hashes);
+  return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
+}
+
+static inline uint16_t compute_seed_hash(uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
+  return hashes.h1;
+}
+
+// iterator
+
+template<typename Entry, typename ExtractKey>
+class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
+public:
+  theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
+  theta_const_iterator& operator++();
+  theta_const_iterator operator++(int);
+  bool operator==(const theta_const_iterator& other) const;
+  bool operator!=(const theta_const_iterator& other) const;
+  const Entry& operator*() const;
+
+private:
+  const Entry* entries_;
+  uint32_t size_;
+  uint32_t index_;
+};
+
+} /* namespace datasketches */
+
+#include "theta_update_sketch_base_impl.hpp"
+
+#endif
diff --git a/tuple/include/theta_update_sketch_base_impl.hpp b/tuple/include/theta_update_sketch_base_impl.hpp
new file mode 100644
index 0000000..f65f3e9
--- /dev/null
+++ b/tuple/include/theta_update_sketch_base_impl.hpp
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+namespace datasketches {
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
+is_empty_(true),
+lg_cur_size_(lg_cur_size),
+lg_nom_size_(lg_nom_size),
+rf_(rf),
+p_(p),
+num_entries_(0),
+theta_(theta_constants::MAX_THETA),
+seed_(seed),
+entries_(nullptr)
+{
+  const size_t size = 1 << (lg_cur_size + 1);
+  entries_ = A().allocate(size);
+  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  if (p < 1) this->theta_ *= p;
+}
+
+template<typename EN, typename EK, typename A>
+theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
+{
+  const size_t size = 1 << lg_cur_size_;
+  for (size_t i = 0; i < size; ++i) {
+    if (EK()(entries_[i]) != 0) entries_[i].~EN();
+  }
+  A().deallocate(entries_, size);
+}
+
+//template<typename EN, typename EK, typename A>
+//void theta_base<EN, EK, A>::update(EN&& entry) {
+//  const uint64_t hash = EK()(entry);
+//  if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
+//  auto result = find(hash);
+//  if (!result.second) {
+//    insert(result.first, std::forward<EN>(entry));
+//  }
+//}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
+  const size_t size = 1 << lg_cur_size_;
+  const size_t mask = size - 1;
+  const uint32_t stride = get_stride(key, lg_cur_size_);
+  uint32_t index = static_cast<uint32_t>(key) & mask;
+  // search for duplicate or zero
+  const uint32_t loop_index = index;
+  do {
+    const uint64_t probe = EK()(entries_[index]);
+    if (probe == 0) {
+      return std::pair<iterator, bool>(&entries_[index], false);
+    } else if (probe == key) {
+      return std::pair<iterator, bool>(&entries_[index], true);
+    }
+    index = (index + stride) & mask;
+  } while (index != loop_index);
+  throw std::logic_error("key not found and no empty slots!");
+}
+
+template<typename EN, typename EK, typename A>
+template<typename Fwd>
+void theta_update_sketch_base<EN, EK, A>::insert(iterator it, Fwd&& entry) {
+  new (it) EN(std::forward<Fwd>(entry));
+  is_empty_ = false;
+  ++num_entries_;
+  if (num_entries_ > get_capacity(lg_cur_size_, lg_nom_size_)) {
+    if (lg_cur_size_ <= lg_nom_size_) {
+      resize();
+    } else {
+      rebuild();
+    }
+  }
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
+  return entries_;
+}
+
+template<typename EN, typename EK, typename A>
+auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
+  return &entries_[1 << lg_cur_size_];
+}
+
+template<typename EN, typename EK, typename A>
+string<A> theta_update_sketch_base<EN, EK, A>::to_string() const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  auto type = typeid(*this).name();
+  os << "type: " << type << std::endl;
+  os << "sizeof: " << sizeof(*this) << std::endl;
+  os << "is_empty:    " << (is_empty_ ? "true" : "false") << std::endl;
+  os << "lg_cur_size: " << std::to_string(lg_cur_size_) << std::endl;
+  os << "lg_nom_size: " << std::to_string(lg_nom_size_) << std::endl;
+  os << "num_entries: " << num_entries_ << std::endl;
+  os << "theta (as long): " << theta_ << std::endl;
+  os << "theta (as fraction): " << static_cast<double>(theta_) / theta_constants::MAX_THETA << std::endl;
+  return os.str();
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
+  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
+  return std::floor(fraction * (1 << lg_cur_size));
+}
+
+template<typename EN, typename EK, typename A>
+uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t lg_size) {
+  // odd and independent of index assuming lg_size lowest bits of the key were used for the index
+  return (2 * static_cast<uint32_t>((key >> lg_size) & STRIDE_MASK)) + 1;
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::resize() {
+  const size_t old_size = 1 << lg_cur_size_;
+  const uint8_t lg_tgt_size = lg_nom_size_ + 1;
+  const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
+  lg_cur_size_ += factor;
+  const size_t new_size = 1 << lg_cur_size_;
+//  std::cout << "resizing from " << old_size << " to " << new_size << std::endl;
+  EN* old_entries = entries_;
+  entries_ = A().allocate(new_size);
+  for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  for (size_t i = 0; i < old_size; ++i) {
+    const uint64_t key = EK()(old_entries[i]);
+    if (key != 0) {
+      insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
+      old_entries[i].~EN();
+    }
+  }
+  A().deallocate(old_entries, old_size);
+}
+
+template<typename EN, typename EK, typename A>
+void theta_update_sketch_base<EN, EK, A>::rebuild() {
+//  std::cout << "rebuilding" << std::endl;
+  const size_t size = 1 << lg_cur_size_;
+  const uint32_t pivot = (1 << lg_nom_size_) + size - num_entries_;
+  std::nth_element(&entries_[0], &entries_[pivot], &entries_[size], comparator());
+  this->theta_ = EK()(entries_[pivot]);
+  EN* old_entries = entries_;
+  entries_ = A().allocate(size);
+  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
+  num_entries_ = 0;
+  for (size_t i = 0; i < size; ++i) {
+    const uint64_t key = EK()(old_entries[i]);
+    if (key != 0 && key < this->theta_) {
+      insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
+      old_entries[i].~EN();
+    }
+  }
+  A().deallocate(old_entries, size);
+}
+
+// builder
+
+template<bool dummy>
+theta_base_builder<dummy>::theta_base_builder():
+lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
+
+template<bool dummy>
+auto theta_base_builder<dummy>::set_lg_k(uint8_t lg_k) -> theta_base_builder& {
+  if (lg_k < MIN_LG_K) {
+    throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
+  }
+  lg_k_ = lg_k;
+  return *this;
+}
+
+template<bool dummy>
+auto theta_base_builder<dummy>::set_resize_factor(resize_factor rf) -> theta_base_builder& {
+  rf_ = rf;
+  return *this;
+}
+
+template<bool dummy>
+auto theta_base_builder<dummy>::set_p(float p) -> theta_base_builder& {
+  if (p < 0 || p > 1) throw std::invalid_argument("sampling probability must be between 0 and 1");
+  p_ = p;
+  return *this;
+}
+
+template<bool dummy>
+auto theta_base_builder<dummy>::set_seed(uint64_t seed) -> theta_base_builder& {
+  seed_ = seed;
+  return *this;
+}
+
+template<bool dummy>
+uint8_t theta_base_builder<dummy>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
+  return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
+}
+
+// iterator
+
+template<typename Entry, typename ExtractKey>
+theta_const_iterator<Entry, ExtractKey>::theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index):
+entries_(entries), size_(size), index_(index) {
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++() -> theta_const_iterator& {
+  ++index_;
+  while (index_ < size_ && ExtractKey()(entries_[index_]) == 0) ++index_;
+  return *this;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator++(int) -> theta_const_iterator {
+  theta_const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator!=(const theta_const_iterator& other) const {
+  return index_ != other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_iterator& other) const {
+  return index_ == other.index_;
+}
+
+template<typename Entry, typename ExtractKey>
+auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry& {
+  return entries_[index_];
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
new file mode 100644
index 0000000..55dda48
--- /dev/null
+++ b/tuple/include/tuple_sketch.hpp
@@ -0,0 +1,502 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_SKETCH_HPP_
+#define TUPLE_SKETCH_HPP_
+
+#include <string>
+
+#include "serde.hpp"
+#include "theta_update_sketch_base.hpp"
+
+namespace datasketches {
+
+// forward-declarations
+template<typename S, typename SD, typename A> class tuple_sketch;
+template<typename S, typename U, typename P, typename SD, typename A> class update_tuple_sketch;
+template<typename S, typename SD, typename A> class compact_tuple_sketch;
+
+template<
+  typename Summary,
+  typename SerDe = serde<Summary>,
+  typename Allocator = std::allocator<Summary>
+>
+class tuple_sketch {
+public:
+  static const uint8_t SERIAL_VERSION = 3;
+
+  virtual ~tuple_sketch() = default;
+
+  /**
+   * @return true if this sketch represents an empty set (not the same as no retained entries!)
+   */
+  virtual bool is_empty() const = 0;
+
+  /**
+   * @return estimate of the distinct count of the input stream
+   */
+  double get_estimate() const;
+
+  /**
+   * Returns the approximate lower error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the lower bound
+   */
+  double get_lower_bound(uint8_t num_std_devs) const;
+
+  /**
+   * Returns the approximate upper error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the upper bound
+   */
+  double get_upper_bound(uint8_t num_std_devs) const;
+
+  /**
+   * @return true if the sketch is in estimation mode (as opposed to exact mode)
+   */
+  bool is_estimation_mode() const;
+
+  /**
+   * @return theta as a fraction from 0 to 1 (effective sampling rate)
+   */
+  double get_theta() const;
+
+  /**
+   * @return theta as a positive integer between 0 and LLONG_MAX
+   */
+  virtual uint64_t get_theta64() const = 0;
+
+  /**
+   * @return the number of retained entries in the sketch
+   */
+  virtual uint32_t get_num_retained() const = 0;
+
+  virtual uint16_t get_seed_hash() const = 0;
+
+  /**
+   * @return true if retained entries are ordered
+   */
+  virtual bool is_ordered() const = 0;
+
+  /**
+   * Writes a human-readable summary of this sketch to a given stream
+   * @param print_items if true include the list of items retained by the sketch
+   */
+  virtual string<Allocator> to_string(bool print_items = false) const = 0;
+
+  /**
+   * This method serializes the sketch into a given stream in a binary form
+   * @param os output stream
+   */
+//  virtual void serialize(std::ostream& os) const = 0;
+
+  // This is a convenience alias for users
+  // The type returned by the following serialize method
+//  typedef vector_u8<Allocator> vector_bytes;
+
+  /**
+   * This method serializes the sketch as a vector of bytes.
+   * An optional header can be reserved in front of the sketch.
+   * It is an uninitialized space of a given size.
+   * This header is used in Datasketches PostgreSQL extension.
+   * @param header_size_bytes space to reserve in front of the sketch
+   */
+//  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const = 0;
+
+  // This is a convenience alias for users
+  // The type returned by the following deserialize methods
+  // It is not possible to return instances of an abstract type, so this has to be a pointer
+//  typedef std::unique_ptr<tuple_sketch, std::function<void(tuple_sketch*)>> unique_ptr;
+
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of a sketch as a unique_ptr
+   */
+//  static unique_ptr deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+//  static unique_ptr deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+
+  using Entry = std::pair<uint64_t, Summary>;
+  using const_iterator = theta_const_iterator<Entry, pair_extract_key<uint64_t, Summary>>;
+
+  /**
+   * Iterator over entries in this sketch.
+   * @return begin iterator
+   */
+  virtual const_iterator begin() const = 0;
+
+  /**
+   * Iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
+   */
+  virtual const_iterator end() const = 0;
+
+protected:
+  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
+
+  static uint16_t get_seed_hash(uint64_t seed);
+
+  static void check_sketch_type(uint8_t actual, uint8_t expected);
+  static void check_serial_version(uint8_t actual, uint8_t expected);
+  static void check_seed_hash(uint16_t actual, uint16_t expected);
+};
+
+// update sketch
+
+// for types with defined default constructor and + operation
+template<typename Summary, typename Update>
+struct default_update_policy {
+  Summary create() const {
+    return Summary();
+  }
+  void update(Summary& summary, const Update& update) const {
+    summary += update;
+  }
+};
+
+template<
+  typename Summary,
+  typename Update = Summary,
+  typename Policy = default_update_policy<Summary, Update>,
+  typename SerDe = serde<Summary>,
+  typename Allocator = std::allocator<Summary>
+>
+class update_tuple_sketch: public tuple_sketch<Summary, SerDe, Allocator> {
+public:
+  using Base = tuple_sketch<Summary, SerDe, Allocator>;
+  using Entry = typename Base::Entry;
+  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
+  using tuple_map = theta_update_sketch_base<Entry, pair_extract_key<uint64_t, Summary>, AllocEntry>;
+  using resize_factor = typename tuple_map::resize_factor;
+  using const_iterator = typename Base::const_iterator;
+
+  static const uint8_t SKETCH_TYPE = 2;
+
+  // No constructor here. Use builder instead.
+  class builder;
+
+  virtual ~update_tuple_sketch() = default;
+
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+  virtual uint16_t get_seed_hash() const;
+  virtual string<Allocator> to_string(bool print_items = false) const;
+
+//  virtual void serialize(std::ostream& os) const;
+//  typedef vector_u8<Allocator> vector_bytes; // alias for users
+//  virtual vector_bytes serialize(unsigned header_size_bytes = 0) const;
+
+  /**
+   * @return configured nominal number of entries in the sketch
+   */
+  uint8_t get_lg_k() const;
+
+  /**
+   * @return configured resize factor of the sketch
+   */
+  resize_factor get_rf() const;
+
+  /**
+   * Update this sketch with a given string.
+   * @param value string to update the sketch with
+   */
+//  template<typename FwdUpdate>
+//  void update(const std::string& key, FwdUpdate&& value);
+
+  /**
+   * Update this sketch with a given unsigned 64-bit integer.
+   * @param value uint64_t to update the sketch with
+   */
+  template<typename FwdUpdate>
+  void update(uint64_t key, FwdUpdate&& value);
+
+  /**
+   * Update this sketch with a given signed 64-bit integer.
+   * @param value int64_t to update the sketch with
+   */
+//  template<typename FwdUpdate>
+//  void update(int64_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given unsigned 32-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value uint32_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(uint32_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given signed 32-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value int32_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(int32_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given unsigned 16-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value uint16_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(uint16_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given signed 16-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value int16_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(int16_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given unsigned 8-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value uint8_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(uint8_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given signed 8-bit integer.
+//   * For compatibility with Java implementation.
+//   * @param value int8_t to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(int8_t key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given double-precision floating point value.
+//   * For compatibility with Java implementation.
+//   * @param value double to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(double key, FwdUpdate&& value);
+//
+//  /**
+//   * Update this sketch with a given floating point value.
+//   * For compatibility with Java implementation.
+//   * @param value float to update the sketch with
+//   */
+//  template<typename FwdUpdate>
+//  void update(float key, FwdUpdate&& value);
+
+  /**
+   * Update this sketch with given data of any type.
+   * This is a "universal" update that covers all cases above,
+   * but may produce different hashes.
+   * Be very careful to hash input values consistently using the same approach
+   * both over time and on different platforms
+   * and while passing sketches between C++ environment and Java environment.
+   * Otherwise two sketches that should represent overlapping sets will be disjoint
+   * For instance, for signed 32-bit values call update(int32_t) method above,
+   * which does widening conversion to int64_t, if compatibility with Java is expected
+   * @param data pointer to the data
+   * @param length of the data in bytes
+   */
+  template<typename FwdUpdate>
+  void update(const void* key, size_t length, FwdUpdate&& value);
+
+  /**
+   * Remove retained entries in excess of the nominal size k (if any)
+   */
+  void trim();
+
+  /**
+   * Converts this sketch to a compact sketch (ordered or unordered).
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return compact sketch
+   */
+  compact_tuple_sketch<Summary, SerDe, Allocator> compact(bool ordered = true) const;
+
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
+
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of a sketch
+   */
+//  static update_tuple_sketch deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+//  static update_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+
+  /**
+   * Computes size needed to serialize the current state of the sketch.
+   * This version is for fixed-size arithmetic types (integral and floating point).
+   * @return size in bytes needed to serialize this sketch
+   */
+//  template<typename SS = Summary, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type = 0>
+//  size_t get_serialized_size_bytes() const;
+
+  /**
+   * Computes size needed to serialize the current state of the sketch.
+   * This version is for all other types and can be expensive since every item needs to be looked at.
+   * @return size in bytes needed to serialize this sketch
+   */
+//  template<typename SS = Summary, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type = 0>
+//  size_t get_serialized_size_bytes() const;
+
+private:
+  Policy policy_;
+  tuple_map map_;
+
+  // for builder
+  update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+};
+
+// compact sketch
+
+template<
+  typename Summary,
+  typename SerDe = serde<Summary>,
+  typename Allocator = std::allocator<Summary>
+>
+class compact_tuple_sketch: public tuple_sketch<Summary, SerDe, Allocator> {
+public:
+  using Base = tuple_sketch<Summary, SerDe, Allocator>;
+  using Entry = typename Base::Entry;
+  using const_iterator = typename Base::const_iterator;
+  using AllocBytes = typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  static const uint8_t SKETCH_TYPE = 3;
+
+  // Instances of this type can be obtained:
+  // - by compacting an update_tuple_sketch
+  // - as a result of a set operation
+  // - by deserializing a previously serialized compact sketch
+
+  compact_tuple_sketch(const Base& other, bool ordered);
+  virtual ~compact_tuple_sketch() = default;
+
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+  virtual uint16_t get_seed_hash() const;
+  virtual string<Allocator> to_string(bool print_items = false) const;
+
+  void serialize(std::ostream& os) const;
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
+
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
+
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of a sketch
+   */
+  static compact_tuple_sketch deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
+
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+  static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+
+  // TODO: try to hide this
+  template<typename InputIt>
+  compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, InputIt first, InputIt last);
+
+private:
+  bool is_empty_;
+  bool is_ordered_;
+  uint16_t seed_hash_;
+  uint64_t theta_;
+
+  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
+  std::vector<Entry, AllocEntry> entries_;
+
+  //compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
+
+  /**
+   * Computes size needed to serialize summaries in the sketch.
+   * This version is for fixed-size arithmetic types (integral and floating point).
+   * @return size in bytes needed to serialize summaries in this sketch
+   */
+  template<typename SS = Summary, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type = 0>
+  size_t get_serialized_size_summaries_bytes() const;
+
+  /**
+   * Computes size needed to serialize summaries in the sketch.
+   * This version is for all other types and can be expensive since every item needs to be looked at.
+   * @return size in bytes needed to serialize summaries in this sketch
+   */
+  template<typename SS = Summary, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type = 0>
+  size_t get_serialized_size_summaries_bytes() const;
+
+//  static compact_tuple_sketch<Summary, SerDe, Allocator> internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
+//  static compact_tuple_sketch<Summary, SerDe, Allocator> internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash);
+};
+
+// builder
+
+template<typename S, typename U, typename P, typename SD, typename A>
+class update_tuple_sketch<S, U, P, SD, A>::builder: public theta_base_builder<true> {
+public:
+  /**
+   * Creates and instance of the builder with default parameters.
+   */
+  builder(const P& policy = P());
+
+  /**
+   * This is to create an instance of the sketch with predefined parameters.
+   * @return an instance of the sketch
+   */
+  update_tuple_sketch<S, U, P, SD, A> build() const;
+
+private:
+  P policy_;
+};
+
+} /* namespace datasketches */
+
+#include "tuple_sketch_impl.hpp"
+
+#endif
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
new file mode 100644
index 0000000..c6bb323
--- /dev/null
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <sstream>
+
+#include "binomial_bounds.hpp"
+
+namespace datasketches {
+
+template<typename S, typename SD, typename A>
+bool tuple_sketch<S, SD, A>::is_estimation_mode() const {
+  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
+}
+
+template<typename S, typename SD, typename A>
+double tuple_sketch<S, SD, A>::get_theta() const {
+  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
+}
+
+template<typename S, typename SD, typename A>
+double tuple_sketch<S, SD, A>::get_estimate() const {
+  return get_num_retained() / get_theta();
+}
+
+template<typename S, typename SD, typename A>
+double tuple_sketch<S, SD, A>::get_lower_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+template<typename S, typename SD, typename A>
+double tuple_sketch<S, SD, A>::get_upper_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+// update sketch
+
+template<typename S, typename U, typename P, typename SD, typename A>
+update_tuple_sketch<S, U, P, SD, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+policy_(policy),
+map_(lg_cur_size, lg_nom_size, rf, p, seed)
+{}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+bool update_tuple_sketch<S, U, P, SD, A>::is_empty() const {
+  return map_.is_empty_;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+bool update_tuple_sketch<S, U, P, SD, A>::is_ordered() const {
+  return false;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+uint64_t update_tuple_sketch<S, U, P, SD, A>::get_theta64() const {
+  return map_.theta_;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+uint32_t update_tuple_sketch<S, U, P, SD, A>::get_num_retained() const {
+  return map_.num_entries_;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+uint16_t update_tuple_sketch<S, U, P, SD, A>::get_seed_hash() const {
+  return compute_seed_hash(map_.seed_);
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+uint8_t update_tuple_sketch<S, U, P, SD, A>::get_lg_k() const {
+  return map_.lg_nom_size_;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+auto update_tuple_sketch<S, U, P, SD, A>::get_rf() const -> resize_factor {
+  return map_.rf_;
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+template<typename UU>
+void update_tuple_sketch<S, U, P, SD, A>::update(uint64_t key, UU&& value) {
+  update(&key, sizeof(key), std::forward<UU>(value));
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+template<typename UU>
+void update_tuple_sketch<S, U, P, SD, A>::update(const void* key, size_t length, UU&& value) {
+  const uint64_t hash = compute_hash(key, length, map_.seed_);
+  if (hash >= map_.theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
+  auto result = map_.find(hash);
+  if (!result.second) {
+    map_.insert(result.first, Entry(hash, policy_.create()));
+  }
+  policy_.update((*result.first).second, std::forward<UU>(value));
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+string<A> update_tuple_sketch<S, U, P, SD, A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  auto type = typeid(*this).name();
+  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
+  os << "sizeof(entry)=" << sizeof(Entry) << std::endl;
+  os << map_.to_string();
+  if (detail) {
+    for (const auto& it: map_) {
+      if (it.first != 0) {
+        os << it.first << ": " << it.second << std::endl;
+      }
+    }
+  }
+  return os.str();
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+auto update_tuple_sketch<S, U, P, SD, A>::begin() const -> const_iterator {
+  return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+auto update_tuple_sketch<S, U, P, SD, A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
+}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+compact_tuple_sketch<S, SD, A> update_tuple_sketch<S, U, P, SD, A>::compact(bool ordered) const {
+  return compact_tuple_sketch<S, SD, A>(*this, ordered);
+}
+
+// compact sketch
+
+//template<typename S, typename SD, typename A>
+//compact_tuple_sketch<S, SD, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries):
+//is_empty_(is_empty),
+//is_ordered_(is_ordered),
+//seed_hash_(seed_hash),
+//theta_(theta),
+//entries_(std::move(entries))
+//{}
+
+template<typename S, typename SD, typename A>
+template<typename InputIt>
+compact_tuple_sketch<S, SD, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, InputIt first, InputIt last):
+is_empty_(is_empty),
+is_ordered_(is_ordered),
+seed_hash_(seed_hash),
+theta_(theta),
+entries_()
+{
+  std::copy_if(first, last, back_inserter(entries_), key_less_than<uint64_t, Entry, pair_extract_key<uint64_t, S>>(theta));
+}
+
+template<typename S, typename SD, typename A>
+compact_tuple_sketch<S, SD, A>::compact_tuple_sketch(const Base& other, bool ordered):
+is_empty_(other.is_empty()),
+is_ordered_(other.is_ordered() || ordered),
+seed_hash_(other.get_seed_hash()),
+theta_(other.get_theta64()),
+entries_()
+{
+  entries_.reserve(other.get_num_retained());
+  std::copy(other.begin(), other.end(), std::back_inserter(entries_));
+  if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
+}
+
+template<typename S, typename SD, typename A>
+bool compact_tuple_sketch<S, SD, A>::is_empty() const {
+  return is_empty_;
+}
+
+template<typename S, typename SD, typename A>
+bool compact_tuple_sketch<S, SD, A>::is_ordered() const {
+  return is_ordered_;
+}
+
+template<typename S, typename SD, typename A>
+uint64_t compact_tuple_sketch<S, SD, A>::get_theta64() const {
+  return theta_;
+}
+
+template<typename S, typename SD, typename A>
+uint32_t compact_tuple_sketch<S, SD, A>::get_num_retained() const {
+  return entries_.size();
+}
+
+template<typename S, typename SD, typename A>
+uint16_t compact_tuple_sketch<S, SD, A>::get_seed_hash() const {
+  return seed_hash_;
+}
+
+template<typename S, typename SD, typename A>
+string<A> compact_tuple_sketch<S, SD, A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  auto type = typeid(*this).name();
+  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
+  os << "sizeof(entry)=" << sizeof(Entry) << std::endl;
+  os << "empty? : " << (is_empty_ ? "true" : "false") << std::endl;
+  if (detail) {
+    for (const auto& it: entries_) {
+      if (it.first != 0) {
+        os << it.first << ": " << it.second << std::endl;
+      }
+    }
+  }
+  return os.str();
+}
+
+// implementation for fixed-size arithmetic types (integral and floating point)
+template<typename S, typename SD, typename A>
+template<typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
+size_t compact_tuple_sketch<S, SD, A>::get_serialized_size_summaries_bytes() const {
+  return entries_.size() * sizeof(SS);
+}
+
+// implementation for all other types (non-arithmetic)
+template<typename S, typename SD, typename A>
+template<typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
+size_t compact_tuple_sketch<S, SD, A>::get_serialized_size_summaries_bytes() const {
+  size_t size = 0;
+  for (const auto& it: entries_) {
+    size += SD().size_of_item(it.second);
+  }
+  return size;
+}
+
+template<typename S, typename SD, typename A>
+void compact_tuple_sketch<S, SD, A>::serialize(std::ostream& os) const {
+}
+
+template<typename S, typename SD, typename A>
+auto compact_tuple_sketch<S, SD, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
+  const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
+  const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
+      + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes();
+  vector_bytes bytes(size);
+  uint8_t* ptr = bytes.data() + header_size_bytes;
+  const uint8_t* end_ptr = ptr + size;
+
+  ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
+  const uint8_t serial_version = Base::SERIAL_VERSION;
+  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
+  const uint8_t type = SKETCH_TYPE;
+  ptr += copy_to_mem(&type, ptr, sizeof(type));
+  const uint16_t unused16 = 0;
+  ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
+  const uint8_t flags_byte(
+    (1 << Base::flags::IS_COMPACT) |
+    (1 << Base::flags::IS_READ_ONLY) |
+    (this->is_empty() ? 1 << Base::flags::IS_EMPTY : 0) |
+    (this->is_ordered() ? 1 << Base::flags::IS_ORDERED : 0)
+  );
+  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
+  const uint16_t seed_hash = get_seed_hash();
+  ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
+  if (!this->is_empty()) {
+    if (!is_single_item) {
+      const uint32_t num_entries = entries_.size();
+      ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
+      const uint32_t unused32 = 0;
+      ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
+      if (this->is_estimation_mode()) {
+        ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
+      }
+    }
+    for (const auto& it: entries_) {
+      ptr += copy_to_mem(&it.first, ptr, sizeof(uint64_t));
+    }
+    for (const auto& it: entries_) {
+      ptr += SD().serialize(ptr, end_ptr - ptr, &it.second, 1);
+    }
+  }
+  return bytes;
+}
+
+template<typename S, typename SD, typename A>
+auto compact_tuple_sketch<S, SD, A>::begin() const -> const_iterator {
+  return const_iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename S, typename SD, typename A>
+auto compact_tuple_sketch<S, SD, A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, entries_.size());
+}
+
+// builder
+
+template<typename S, typename U, typename P, typename SD, typename A>
+update_tuple_sketch<S, U, P, SD, A>::builder::builder(const P& policy):
+theta_base_builder<true>(), policy_(policy) {}
+
+template<typename S, typename U, typename P, typename SD, typename A>
+update_tuple_sketch<S, U, P, SD, A> update_tuple_sketch<S, U, P, SD, A>::builder::build() const {
+  return update_tuple_sketch<S, U, P, SD, A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_, policy_);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
new file mode 100644
index 0000000..a23cade
--- /dev/null
+++ b/tuple/include/tuple_union.hpp
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_UNION_HPP_
+#define TUPLE_UNION_HPP_
+
+#include "serde.hpp"
+#include "tuple_sketch.hpp"
+#include "theta_union_base.hpp"
+#include "theta_sketch_experimental.hpp"
+
+namespace datasketches {
+
+// for types with defined + operation
+template<typename Summary>
+struct default_union_policy {
+  void operator()(Summary& summary, const Summary& other) const {
+    summary += other;
+  }
+};
+
+template<
+  typename Summary,
+  typename Policy = default_union_policy<Summary>,
+  typename SerDe = serde<Summary>,
+  typename Allocator = std::allocator<Summary>
+>
+class tuple_union {
+public:
+  using Entry = std::pair<uint64_t, Summary>;
+  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
+  using ExtractKey = pair_extract_key<uint64_t, Summary>;
+  using Sketch = tuple_sketch<Summary, SerDe, Allocator>;
+  using CompactSketch = compact_tuple_sketch<Summary, SerDe, Allocator>;
+  using resize_factor = theta_constants::resize_factor;
+
+  struct internal_policy {
+    internal_policy(const Policy& policy): policy_(policy) {}
+    Entry& operator()(Entry& internal_entry, const Entry& incoming_entry) const {
+      policy_(internal_entry.second, incoming_entry.second);
+      return internal_entry;
+    }
+    Policy policy_;
+  };
+
+  using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
+
+  // No constructor here. Use builder instead.
+  class builder;
+
+  /**
+   * This method is to update the union with a given sketch
+   * @param sketch to update the union with
+   */
+  void update(const Sketch& sketch);
+
+  /**
+   * This method produces a copy of the current state of the union as a compact sketch.
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return the result of the union
+   */
+  CompactSketch get_result(bool ordered = true) const;
+
+private:
+  State state_;
+
+  // for builder
+  tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+};
+
+template<typename S, typename P, typename SD, typename A>
+class tuple_union<S, P, SD, A>::builder: public theta_base_builder<true> {
+public:
+  /**
+   * Creates and instance of the builder with default parameters.
+   */
+  builder(const P& policy = P());
+
+  /**
+   * This is to create an instance of the union with predefined parameters.
+   * @return an instance of the union
+   */
+  tuple_union<S, P, SD, A> build() const;
+
+private:
+  P policy_;
+};
+
+} /* namespace datasketches */
+
+#include "tuple_union_impl.hpp"
+
+#endif
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
new file mode 100644
index 0000000..8cfca52
--- /dev/null
+++ b/tuple/include/tuple_union_impl.hpp
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+namespace datasketches {
+
+template<typename S, typename P, typename SD, typename A>
+tuple_union<S, P, SD, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, policy)
+{}
+
+template<typename S, typename P, typename SD, typename A>
+void tuple_union<S, P, SD, A>::update(const Sketch& sketch) {
+  state_.update(sketch);
+}
+
+template<typename S, typename P, typename SD, typename A>
+auto tuple_union<S, P, SD, A>::get_result(bool ordered) const -> CompactSketch {
+  return state_.get_result(ordered);
+}
+
+template<typename S, typename P, typename SD, typename A>
+tuple_union<S, P, SD, A>::builder::builder(const P& policy):
+policy_(policy) {}
+
+template<typename S, typename P, typename SD, typename A>
+auto tuple_union<S, P, SD, A>::builder::build() const -> tuple_union {
+  return tuple_union(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_, policy_);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
new file mode 100644
index 0000000..29f9c75
--- /dev/null
+++ b/tuple/test/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_executable(tuple_test)
+
+target_link_libraries(tuple_test tuple common_test)
+
+set_target_properties(tuple_test PROPERTIES
+  CXX_STANDARD 11
+  CXX_STANDARD_REQUIRED YES
+)
+
+file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
+string(APPEND THETA_TEST_BINARY_PATH "/")
+target_compile_definitions(tuple_test
+  PRIVATE
+    TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}"
+)
+
+add_test(
+  NAME tuple_test
+  COMMAND tuple_test
+)
+
+target_sources(tuple_test
+  PRIVATE
+    tuple_sketch_test.cpp
+    tuple_union_test.cpp
+    theta_sketch_experimental_test.cpp
+    theta_union_experimental_test.cpp
+)
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
new file mode 100644
index 0000000..5407689
--- /dev/null
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include <catch.hpp>
+#include <theta_sketch_experimental.hpp>
+#include <../../theta/include/theta_sketch.hpp>
+
+namespace datasketches {
+
+TEST_CASE("theta_sketch_experimental: basics ", "[theta_sketch]") {
+  auto update_sketch = theta_sketch_experimental<>::builder().build();
+  update_sketch.update(1);
+  update_sketch.update(2);
+  REQUIRE(update_sketch.get_num_retained() == 2);
+  int count = 0;
+  for (const auto& entry: update_sketch) ++count;
+  REQUIRE(count == 2);
+
+  auto compact_sketch = update_sketch.compact();
+  REQUIRE(compact_sketch.get_num_retained() == 2);
+}
+
+//TEST_CASE("theta_sketch_experimental: compare with theta production", "[theta_sketch]") {
+//  auto test = theta_sketch_experimental<>::builder().build();
+//  update_theta_sketch prod = update_theta_sketch::builder().build();
+//
+//  for (int i = 0; i < 1000000; ++i) {
+//    test.update(i);
+//    prod.update(i);
+//  }
+//
+//  std::cout << "--- theta production vs experimental ---" << std::endl;
+//  std::cout << test.to_string(true);
+//  std::cout << "sizeof(update_theta_sketch)=" << sizeof(update_theta_sketch) << std::endl;
+//  std::cout << prod.to_string(true);
+//}
+
+} /* namespace datasketches */
diff --git a/tuple/test/theta_union_experimental_test.cpp b/tuple/test/theta_union_experimental_test.cpp
new file mode 100644
index 0000000..cbe1412
--- /dev/null
+++ b/tuple/test/theta_union_experimental_test.cpp
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include <catch.hpp>
+#include <tuple_union.hpp>
+
+#include <theta_union_experimental.hpp>
+
+namespace datasketches {
+
+TEST_CASE("theta_union_exeperimental") {
+  std::cout << "theta union test begin" << std::endl;
+  auto update_sketch1 = theta_sketch_experimental<>::builder().build();
+  update_sketch1.update(1);
+  update_sketch1.update(2);
+
+  auto update_sketch2 = theta_sketch_experimental<>::builder().build();
+  update_sketch2.update(1);
+  update_sketch2.update(3);
+
+  auto u = theta_union_experimental<>::builder().build();
+  u.update(update_sketch1);
+  u.update(update_sketch2);
+  auto r = u.get_result();
+  std::cout << r.to_string(true);
+  std::cout << "theta union test end" << std::endl;
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/tuple_sketch_test.cpp b/tuple/test/tuple_sketch_test.cpp
new file mode 100644
index 0000000..730cbb5
--- /dev/null
+++ b/tuple/test/tuple_sketch_test.cpp
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <tuple>
+
+// this is needed for a test below, but should be defined here
+std::ostream& operator<<(std::ostream& os, const std::tuple<double, double, double>& tuple) {
+  os << std::get<0>(tuple) << ", " << std::get<1>(tuple) << ", " << std::get<2>(tuple);
+  return os;
+}
+
+#include <catch.hpp>
+#include <tuple_sketch.hpp>
+#include <test_type.hpp>
+
+namespace datasketches {
+
+TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
+  auto builder = update_tuple_sketch<float>::builder();
+  builder.set_lg_k(10).set_p(0.5).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
+  auto sketch = builder.build();
+  REQUIRE(sketch.get_lg_k() == 10);
+  REQUIRE(sketch.get_theta() == 0.5);
+  REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
+  REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
+}
+
+TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
+  auto update_sketch = update_tuple_sketch<float>::builder().build();
+  REQUIRE(update_sketch.is_empty());
+  REQUIRE(!update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_estimate() == 0);
+  REQUIRE(update_sketch.get_lower_bound(1) == 0);
+  REQUIRE(update_sketch.get_upper_bound(1) == 0);
+  REQUIRE(update_sketch.get_theta() == 1);
+  REQUIRE(update_sketch.get_num_retained() == 0);
+  REQUIRE(!update_sketch.is_ordered());
+
+  auto compact_sketch = update_sketch.compact();
+  REQUIRE(compact_sketch.is_empty());
+  REQUIRE(!compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_estimate() == 0);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 0);
+  REQUIRE(compact_sketch.get_upper_bound(1) == 0);
+  REQUIRE(compact_sketch.get_theta() == 1);
+  REQUIRE(compact_sketch.get_num_retained() == 0);
+  REQUIRE(compact_sketch.is_ordered());
+}
+
+TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
+  auto update_sketch = update_tuple_sketch<float>::builder().build();
+  update_sketch.update(1, 1);
+  update_sketch.update(2, 2);
+  update_sketch.update(1, 1);
+  std::cout << update_sketch.to_string(true);
+  REQUIRE(!update_sketch.is_empty());
+  REQUIRE(!update_sketch.is_estimation_mode());
+  REQUIRE(update_sketch.get_estimate() == 2);
+  REQUIRE(update_sketch.get_lower_bound(1) == 2);
+  REQUIRE(update_sketch.get_upper_bound(1) == 2);
+  REQUIRE(update_sketch.get_theta() == 1);
+  REQUIRE(update_sketch.get_num_retained() == 2);
+  REQUIRE(!update_sketch.is_ordered());
+  int count = 0;
+  for (const auto& entry: update_sketch) {
+    REQUIRE(entry.second == 2);
+    ++count;
+  }
+  REQUIRE(count == 2);
+
+  auto compact_sketch = update_sketch.compact();
+  std::cout << compact_sketch.to_string(true);
+  REQUIRE(!compact_sketch.is_empty());
+  REQUIRE(!compact_sketch.is_estimation_mode());
+  REQUIRE(compact_sketch.get_estimate() == 2);
+  REQUIRE(compact_sketch.get_lower_bound(1) == 2);
+  REQUIRE(compact_sketch.get_upper_bound(1) == 2);
+  REQUIRE(compact_sketch.get_theta() == 1);
+  REQUIRE(compact_sketch.get_num_retained() == 2);
+  REQUIRE(compact_sketch.is_ordered());
+  count = 0;
+  for (const auto& entry: compact_sketch) {
+    REQUIRE(entry.second == 2);
+    ++count;
+  }
+  REQUIRE(count == 2);
+}
+
+template<typename T>
+class max_value_policy {
+public:
+  max_value_policy(const T& initial_value): initial_value(initial_value) {}
+  T create() const { return initial_value; }
+  void update(T& summary, const T& update) const { summary = std::max(summary, update); }
+private:
+  T initial_value;
+};
+
+typedef update_tuple_sketch<float, float, max_value_policy<float>> max_float_update_tuple_sketch;
+
+TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") {
+  auto update_sketch = max_float_update_tuple_sketch::builder(max_value_policy<float>(5)).build();
+  update_sketch.update(1, 1);
+  update_sketch.update(1, 2);
+  update_sketch.update(2, 10);
+  update_sketch.update(3, 3);
+  update_sketch.update(3, 7);
+  std::cout << update_sketch.to_string(true);
+  int count = 0;
+  float sum = 0;
+  for (const auto& entry: update_sketch) {
+    sum += entry.second;
+    ++count;
+  }
+  REQUIRE(count == 3);
+  REQUIRE(sum == 22); // 5 + 10 + 7
+}
+
+struct test_type_replace_policy {
+  test_type create() const { return test_type(0); }
+  void update(test_type& summary, const test_type& update) const {
+    //std::cerr << "policy::update lvalue begin" << std::endl;
+    summary = update;
+    //std::cerr << "policy::update lvalue end" << std::endl;
+  }
+  void update(test_type& summary, test_type&& update) const {
+    //std::cerr << "policy::update rvalue begin" << std::endl;
+    summary = std::move(update);
+    //std::cerr << "policy::update rvalue end" << std::endl;
+  }
+};
+
+TEST_CASE("tuple sketch: test type with replace policy", "[tuple_sketch]") {
+  auto sketch = update_tuple_sketch<test_type, test_type, test_type_replace_policy, test_type_serde>::builder().build();
+  test_type a(1);
+  sketch.update(1, a); // this should copy
+  sketch.update(2, 2); // this should move
+  sketch.update(1, 2); // this should move
+  std::cout << sketch.to_string(true);
+  REQUIRE(sketch.get_num_retained() == 2);
+  for (const auto& entry: sketch) {
+    REQUIRE(entry.second.get_value() == 2);
+  }
+}
+
+struct three_doubles_update_policy {
+  std::tuple<double, double, double> create() const {
+    return std::tuple<double, double, double>(0, 0, 0);
+  }
+  void update(std::tuple<double, double, double>& summary, const std::tuple<double, double, double>& update) const {
+    std::get<0>(summary) += std::get<0>(update);
+    std::get<1>(summary) += std::get<1>(update);
+    std::get<2>(summary) += std::get<2>(update);
+  }
+};
+
+TEST_CASE("tuple sketch: array of doubles", "[tuple_sketch]") {
+  using three_doubles = std::tuple<double, double, double>;
+  using three_doubles_update_tuple_sketch = update_tuple_sketch<three_doubles, three_doubles, three_doubles_update_policy>;
+  auto update_sketch = three_doubles_update_tuple_sketch::builder().build();
+  update_sketch.update(1, three_doubles(1, 2, 3));
+  std::cout << update_sketch.to_string(true);
+  const auto& entry = *update_sketch.begin();
+  REQUIRE(std::get<0>(entry.second) == 1.0);
+  REQUIRE(std::get<1>(entry.second) == 2.0);
+  REQUIRE(std::get<2>(entry.second) == 3.0);
+
+  auto compact_sketch = update_sketch.compact();
+  std::cout << compact_sketch.to_string(true);
+}
+
+} /* namespace datasketches */
diff --git a/tuple/test/tuple_union_test.cpp b/tuple/test/tuple_union_test.cpp
new file mode 100644
index 0000000..437aacd
--- /dev/null
+++ b/tuple/test/tuple_union_test.cpp
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include <catch.hpp>
+#include <tuple_union.hpp>
+
+namespace datasketches {
+
+TEST_CASE("tuple_union float") {
+  std::cout << "union test begin" << std::endl;
+  auto update_sketch1 = update_tuple_sketch<float>::builder().build();
+  update_sketch1.update(1, 1);
+  update_sketch1.update(2, 1);
+
+  auto update_sketch2 = update_tuple_sketch<float>::builder().build();
+  update_sketch2.update(1, 1);
+  update_sketch2.update(3, 1);
+
+  auto u = tuple_union<float>::builder().build();
+  u.update(update_sketch1);
+  u.update(update_sketch2);
+  auto r = u.get_result();
+  std::cout << r.to_string(true);
+  std::cout << "union test end" << std::endl;
+}
+
+} /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org