You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/22 22:55:30 UTC

[incubator-datasketches-cpp] branch tuple_sketch updated (75791ce -> a4c2a8e)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


    from 75791ce  reuse update base in intersection, cleanup
     new 862da30  documentation
     new 3f1f579  no need to check for zero, rearranged order
     new a4c2a8e  implementation

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tuple/include/theta_sketch_experimental.hpp      | 365 ++++++++++++++++++++---
 tuple/include/theta_sketch_experimental_impl.hpp | 262 +++++++++++++---
 tuple/include/tuple_sketch.hpp                   |   6 +
 tuple/include/tuple_sketch_impl.hpp              |  30 +-
 tuple/test/theta_sketch_experimental_test.cpp    |   2 +-
 tuple/test/theta_union_experimental_test.cpp     |   4 +-
 6 files changed, 573 insertions(+), 96 deletions(-)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 01/03: documentation

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 862da304111cd91fdf206202d25834e499b3e1b9
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 15:52:34 2020 -0700

    documentation
---
 tuple/include/tuple_sketch.hpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 6ae8833..9e61efb 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -45,6 +45,9 @@ public:
 
   virtual ~tuple_sketch() = default;
 
+  /**
+   * @return allocator
+   */
   virtual Allocator get_allocator() const = 0;
 
   /**
@@ -95,6 +98,9 @@ public:
    */
   virtual uint32_t get_num_retained() const = 0;
 
+  /**
+   * @return hash of the seed that was used to hash the input
+   */
   virtual uint16_t get_seed_hash() const = 0;
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 02/03: no need to check for zero, rearranged order

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 3f1f5790f5425a4bec5e7911dab61ccbf17a41d0
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 15:53:27 2020 -0700

    no need to check for zero, rearranged order
---
 tuple/include/tuple_sketch_impl.hpp | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 7178e50..f0c5b50 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -70,9 +70,7 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
   if (detail) {
     os << "### Retained entries" << std::endl;
     for (const auto& it: *this) {
-      if (it.first != 0) {
-        os << it.first << ": " << it.second << std::endl;
-      }
+      os << it.first << ": " << it.second << std::endl;
     }
     os << "### End retained entries" << std::endl;
   }
@@ -129,13 +127,6 @@ auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
 
 template<typename S, typename U, typename P, typename A>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
-  if (key.empty()) return;
-  update(key.c_str(), key.length(), std::forward<UU>(value));
-}
-
-template<typename S, typename U, typename P, typename A>
-template<typename UU>
 void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
   update(&key, sizeof(key), std::forward<UU>(value));
 }
@@ -178,20 +169,27 @@ void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
 
 template<typename S, typename U, typename P, typename A>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
-  update(canonical_double(key), std::forward<UU>(value));
+void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
+  update(static_cast<int64_t>(key), std::forward<UU>(value));
 }
 
 template<typename S, typename U, typename P, typename A>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
-  update(static_cast<double>(key), std::forward<UU>(value));
+void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
+  if (key.empty()) return;
+  update(key.c_str(), key.length(), std::forward<UU>(value));
 }
 
 template<typename S, typename U, typename P, typename A>
 template<typename UU>
-void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
-  update(static_cast<int64_t>(key), std::forward<UU>(value));
+void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
+  update(canonical_double(key), std::forward<UU>(value));
+}
+
+template<typename S, typename U, typename P, typename A>
+template<typename UU>
+void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
+  update(static_cast<double>(key), std::forward<UU>(value));
 }
 
 template<typename S, typename U, typename P, typename A>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 03/03: implementation

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit a4c2a8ea4393c544b4d3abfd79651fc43b3c5096
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 15:55:16 2020 -0700

    implementation
---
 tuple/include/theta_sketch_experimental.hpp      | 365 ++++++++++++++++++++---
 tuple/include/theta_sketch_experimental_impl.hpp | 262 +++++++++++++---
 tuple/test/theta_sketch_experimental_test.cpp    |   2 +-
 tuple/test/theta_union_experimental_test.cpp     |   4 +-
 4 files changed, 553 insertions(+), 80 deletions(-)

diff --git a/tuple/include/theta_sketch_experimental.hpp b/tuple/include/theta_sketch_experimental.hpp
index da15c0b..64629e7 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -27,68 +27,359 @@ namespace datasketches {
 
 // experimental theta sketch derived from the same base as tuple sketch
 
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_sketch_experimental {
+public:
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using iterator = theta_iterator<Entry, ExtractKey>;
+  using const_iterator = theta_const_iterator<Entry, ExtractKey>;
+
+  virtual ~theta_sketch_experimental() = default;
+
+  /**
+   * @return allocator
+   */
+  virtual Allocator get_allocator() const = 0;
+
+  /**
+   * @return true if this sketch represents an empty set (not the same as no retained entries!)
+   */
+  virtual bool is_empty() const = 0;
+
+  /**
+   * @return estimate of the distinct count of the input stream
+   */
+  double get_estimate() const;
+
+  /**
+   * Returns the approximate lower error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the lower bound
+   */
+  double get_lower_bound(uint8_t num_std_devs) const;
+
+  /**
+   * Returns the approximate upper error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the upper bound
+   */
+  double get_upper_bound(uint8_t num_std_devs) const;
+
+  /**
+   * @return true if the sketch is in estimation mode (as opposed to exact mode)
+   */
+  bool is_estimation_mode() const;
+
+  /**
+   * @return theta as a fraction from 0 to 1 (effective sampling rate)
+   */
+  double get_theta() const;
+
+  /**
+   * @return theta as a positive integer between 0 and LLONG_MAX
+   */
+  virtual uint64_t get_theta64() const = 0;
+
+  /**
+   * @return the number of retained entries in the sketch
+   */
+  virtual uint32_t get_num_retained() const = 0;
+
+  /**
+   * @return hash of the seed that was used to hash the input
+   */
+  virtual uint16_t get_seed_hash() const = 0;
+
+  /**
+   * @return true if retained entries are ordered
+   */
+  virtual bool is_ordered() const = 0;
+
+  /**
+   * Provides a human-readable summary of this sketch as a string
+   * @param print_items if true include the list of items retained by the sketch
+   * @return sketch summary as a string
+   */
+  virtual string<Allocator> to_string(bool print_items = false) const;
+
+  /**
+   * Iterator over hash values in this sketch.
+   * @return begin iterator
+   */
+  virtual iterator begin() = 0;
+
+  /**
+   * Iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
+   */
+  virtual iterator end() = 0;
+
+  /**
+   * Const iterator over hash values in this sketch.
+   * @return begin iterator
+   */
+  virtual const_iterator begin() const = 0;
+
+  /**
+   * Const iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
+   */
+  virtual const_iterator end() const = 0;
+
+protected:
+  virtual void print_specifics(std::ostringstream& os) const = 0;
+};
+
+// forward declaration
 template<typename A> class compact_theta_sketch_experimental;
 
-template<typename A = std::allocator<uint64_t>>
-class theta_sketch_experimental {
+template<typename Allocator = std::allocator<uint64_t>>
+class update_theta_sketch_experimental: public theta_sketch_experimental<Allocator> {
 public:
-  using resize_factor = theta_constants::resize_factor;
-
-  class builder: public theta_base_builder<builder> {
-  public:
-      builder(const A& allocator = A());
-      theta_sketch_experimental build() const;
-  private:
-      A allocator_;
-  };
-
-  A get_allocator() const { return table_.allocator_; };
-  bool is_empty() const { return table_.is_empty_; }
-  bool is_ordered() const { return false; }
-  uint16_t get_seed_hash() const { return compute_seed_hash(DEFAULT_SEED); }
-  uint64_t get_theta64() const { return table_.theta_; }
-  uint32_t get_num_retained() const { return table_.num_entries_; }
-
-  inline void update(uint64_t key);
-  void update(const void* key, size_t length);
+  using Base = theta_sketch_experimental<Allocator>;
+  using Entry = typename Base::Entry;
+  using ExtractKey = typename Base::ExtractKey;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename theta_table::resize_factor;
 
-  void trim();
+  // No constructor here. Use builder instead.
+  class builder;
+
+  update_theta_sketch_experimental(const update_theta_sketch_experimental&) = default;
+  update_theta_sketch_experimental(update_theta_sketch_experimental&&) noexcept = default;
+  virtual ~update_theta_sketch_experimental() = default;
+  update_theta_sketch_experimental& operator=(const update_theta_sketch_experimental&) = default;
+  update_theta_sketch_experimental& operator=(update_theta_sketch_experimental&&) = default;
+
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint16_t get_seed_hash() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+
+  /**
+   * @return configured nominal number of entries in the sketch
+   */
+  uint8_t get_lg_k() const;
 
-  string<A> to_string(bool detail = false) const;
+  /**
+   * @return configured resize factor of the sketch
+   */
+  resize_factor get_rf() const;
 
-  using const_iterator = theta_const_iterator<uint64_t, trivial_extract_key>;
-  const_iterator begin() const;
-  const_iterator end() const;
+  /**
+   * Update this sketch with a given string.
+   * @param value string to update the sketch with
+   */
+  void update(const std::string& value);
 
-  compact_theta_sketch_experimental<A> compact(bool ordered = true) const;
+  /**
+   * Update this sketch with a given unsigned 64-bit integer.
+   * @param value uint64_t to update the sketch with
+   */
+  void update(uint64_t value);
+
+  /**
+   * Update this sketch with a given signed 64-bit integer.
+   * @param value int64_t to update the sketch with
+   */
+  void update(int64_t value);
+
+  /**
+   * Update this sketch with a given unsigned 32-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint32_t to update the sketch with
+   */
+  void update(uint32_t value);
+
+  /**
+   * Update this sketch with a given signed 32-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int32_t to update the sketch with
+   */
+  void update(int32_t value);
+
+  /**
+   * Update this sketch with a given unsigned 16-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint16_t to update the sketch with
+   */
+  void update(uint16_t value);
+
+  /**
+   * Update this sketch with a given signed 16-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int16_t to update the sketch with
+   */
+  void update(int16_t value);
+
+  /**
+   * Update this sketch with a given unsigned 8-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint8_t to update the sketch with
+   */
+  void update(uint8_t value);
+
+  /**
+   * Update this sketch with a given signed 8-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int8_t to update the sketch with
+   */
+  void update(int8_t value);
+
+  /**
+   * Update this sketch with a given double-precision floating point value.
+   * For compatibility with Java implementation.
+   * @param value double to update the sketch with
+   */
+  void update(double value);
+
+  /**
+   * Update this sketch with a given floating point value.
+   * For compatibility with Java implementation.
+   * @param value float to update the sketch with
+   */
+  void update(float value);
+
+  /**
+   * Update this sketch with given data of any type.
+   * This is a "universal" update that covers all cases above,
+   * but may produce different hashes.
+   * Be very careful to hash input values consistently using the same approach
+   * both over time and on different platforms
+   * and while passing sketches between C++ environment and Java environment.
+   * Otherwise two sketches that should represent overlapping sets will be disjoint
+   * For instance, for signed 32-bit values call update(int32_t) method above,
+   * which does widening conversion to int64_t, if compatibility with Java is expected
+   * @param data pointer to the data
+   * @param length of the data in bytes
+   */
+  void update(const void* data, size_t length);
+
+  /**
+   * Remove retained entries in excess of the nominal size k (if any)
+   */
+  void trim();
+
+  /**
+   * Converts this sketch to a compact sketch (ordered or unordered).
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return compact sketch
+   */
+  compact_theta_sketch_experimental<Allocator> compact(bool ordered = true) const;
+
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
 private:
   enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
-  using theta_table = theta_update_sketch_base<uint64_t, trivial_extract_key, A>;
   theta_table table_;
 
-  theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator);
+  update_theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator);
+
+  virtual void print_specifics(std::ostringstream& os) const;
 };
 
-template<typename A = std::allocator<uint64_t>>
-class compact_theta_sketch_experimental {
+// compact sketch
+
+template<typename Allocator = std::allocator<uint64_t>>
+class compact_theta_sketch_experimental: public theta_sketch_experimental<Allocator> {
 public:
-  compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered);
+  using Base = theta_sketch_experimental<Allocator>;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using AllocBytes = typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  // Instances of this type can be obtained:
+  // - by compacting an update_theta_sketch
+  // - as a result of a set operation
+  // - by deserializing a previously serialized compact sketch
+
+  compact_theta_sketch_experimental(const Base& other, bool ordered);
+  compact_theta_sketch_experimental(const compact_theta_sketch_experimental&) = default;
+  compact_theta_sketch_experimental(compact_theta_sketch_experimental&&) noexcept = default;
+  virtual ~compact_theta_sketch_experimental() = default;
+  compact_theta_sketch_experimental& operator=(const compact_theta_sketch_experimental&) = default;
+  compact_theta_sketch_experimental& operator=(compact_theta_sketch_experimental&&) = default;
+
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+  virtual uint16_t get_seed_hash() const;
+
+  /**
+   * This method serializes the sketch into a given stream in a binary form
+   * @param os output stream
+   */
+  void serialize(std::ostream& os) const;
 
-  compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, A>&& entries);
+  /**
+   * This method serializes the sketch as a vector of bytes.
+   * An optional header can be reserved in front of the sketch.
+   * It is an uninitialized space of a given size.
+   * This header is used in Datasketches PostgreSQL extension.
+   * @param header_size_bytes space to reserve in front of the sketch
+   */
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
 
-  uint32_t get_num_retained() const { return entries_.size(); }
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
-  string<A> to_string(bool detail = false) const;
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+  static compact_theta_sketch_experimental deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
 
-  A get_allocator() const;
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+  static compact_theta_sketch_experimental deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+
+  // for internal use
+  compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
 
 private:
   bool is_empty_;
   bool is_ordered_;
   uint16_t seed_hash_;
   uint64_t theta_;
-  std::vector<uint64_t, A> entries_;
+  std::vector<uint64_t, Allocator> entries_;
+
+  virtual void print_specifics(std::ostringstream& os) const;
+};
+
+template<typename Allocator>
+class update_theta_sketch_experimental<Allocator>::builder: public theta_base_builder<builder> {
+public:
+    builder(const Allocator& allocator = Allocator());
+    update_theta_sketch_experimental build() const;
+private:
+    Allocator allocator_;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp b/tuple/include/theta_sketch_experimental_impl.hpp
index 19fb707..4471215 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -19,24 +19,170 @@
 
 #include <sstream>
 
+#include "binomial_bounds.hpp"
+
 namespace datasketches {
 
-// experimental update theta sketch derived from the same base as tuple sketch
+template<typename A>
+bool theta_sketch_experimental<A>::is_estimation_mode() const {
+  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_theta() const {
+  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_estimate() const {
+  return get_num_retained() / get_theta();
+}
 
 template<typename A>
-theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+double theta_sketch_experimental<A>::get_lower_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_upper_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+template<typename A>
+string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  os << "### Theta sketch summary:" << std::endl;
+  os << "   num retained entries : " << get_num_retained() << std::endl;
+  os << "   seed hash            : " << get_seed_hash() << std::endl;
+  os << "   empty?               : " << (is_empty() ? "true" : "false") << std::endl;
+  os << "   ordered?             : " << (is_ordered() ? "true" : "false") << std::endl;
+  os << "   estimation mode?     : " << (is_estimation_mode() ? "true" : "false") << std::endl;
+  os << "   theta (fraction)     : " << get_theta() << std::endl;
+  os << "   theta (raw 64-bit)   : " << get_theta64() << std::endl;
+  os << "   estimate             : " << this->get_estimate() << std::endl;
+  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
+  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
+  print_specifics(os);
+  os << "### End sketch summary" << std::endl;
+  if (detail) {
+    os << "### Retained entries" << std::endl;
+    for (const auto& hash: *this) {
+      os << hash << std::endl;
+    }
+    os << "### End retained entries" << std::endl;
+  }
+  return os.str();
+}
+
+// update sketch
+
+template<typename A>
+update_theta_sketch_experimental<A>::update_theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
     uint64_t theta, uint64_t seed, const A& allocator):
 table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
 {}
 
 template<typename A>
-void theta_sketch_experimental<A>::update(uint64_t key) {
-  update(&key, sizeof(key));
+A update_theta_sketch_experimental<A>::get_allocator() const {
+  return table_.allocator_;
+}
+
+template<typename A>
+bool update_theta_sketch_experimental<A>::is_empty() const {
+  return table_.is_empty_;
+}
+
+template<typename A>
+bool update_theta_sketch_experimental<A>::is_ordered() const {
+  return false;
+}
+
+template<typename A>
+uint64_t update_theta_sketch_experimental<A>::get_theta64() const {
+  return table_.theta_;
+}
+
+template<typename A>
+uint32_t update_theta_sketch_experimental<A>::get_num_retained() const {
+  return table_.num_entries_;
+}
+
+template<typename A>
+uint16_t update_theta_sketch_experimental<A>::get_seed_hash() const {
+  return compute_seed_hash(table_.seed_);
+}
+
+template<typename A>
+uint8_t update_theta_sketch_experimental<A>::get_lg_k() const {
+  return table_.lg_nom_size_;
+}
+
+template<typename A>
+auto update_theta_sketch_experimental<A>::get_rf() const -> resize_factor {
+  return table_.rf_;
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint64_t value) {
+  update(&value, sizeof(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int64_t value) {
+  update(&value, sizeof(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint32_t value) {
+  update(static_cast<int32_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int32_t value) {
+  update(static_cast<int64_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint16_t value) {
+  update(static_cast<int16_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int16_t value) {
+  update(static_cast<int64_t>(value));
 }
 
 template<typename A>
-void theta_sketch_experimental<A>::update(const void* key, size_t length) {
-  const uint64_t hash = table_.hash_and_screen(key, length);
+void update_theta_sketch_experimental<A>::update(uint8_t value) {
+  update(static_cast<int8_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int8_t value) {
+  update(static_cast<int64_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(double value) {
+  update(canonical_double(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(float value) {
+  update(static_cast<double>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(const std::string& value) {
+  if (value.empty()) return;
+  update(value.c_str(), value.length());
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(const void* data, size_t length) {
+  const uint64_t hash = table_.hash_and_screen(data, length);
   if (hash == 0) return;
   auto result = table_.find(hash);
   if (!result.second) {
@@ -45,55 +191,55 @@ void theta_sketch_experimental<A>::update(const void* key, size_t length) {
 }
 
 template<typename A>
-void theta_sketch_experimental<A>::trim() {
+void update_theta_sketch_experimental<A>::trim() {
   table_.trim();
 }
 
 template<typename A>
-string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  auto type = typeid(*this).name();
-  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
-  os << table_.to_string();
-  if (detail) {
-    for (const auto& it: table_) {
-      if (it != 0) {
-        os << it << std::endl;
-      }
-    }
-  }
-  return os.str();
+auto update_theta_sketch_experimental<A>::begin() -> iterator {
+  return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-auto theta_sketch_experimental<A>::begin() const -> const_iterator {
+auto update_theta_sketch_experimental<A>::end() -> iterator {
+  return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
+}
+
+template<typename A>
+auto update_theta_sketch_experimental<A>::begin() const -> const_iterator {
   return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-auto theta_sketch_experimental<A>::end() const -> const_iterator {
+auto update_theta_sketch_experimental<A>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
-
 template<typename A>
-compact_theta_sketch_experimental<A> theta_sketch_experimental<A>::compact(bool ordered) const {
+compact_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::compact(bool ordered) const {
   return compact_theta_sketch_experimental<A>(*this, ordered);
 }
 
+template<typename A>
+void update_theta_sketch_experimental<A>::print_specifics(std::ostringstream& os) const {
+  os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
+  os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
+  os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
+}
+
 // builder
 
 template<typename A>
-theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+update_theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
 
 template<typename A>
-theta_sketch_experimental<A> theta_sketch_experimental<A>::builder::build() const {
-  return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, allocator_);
+update_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::builder::build() const {
+  return update_theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, allocator_);
 }
 
 // experimental compact theta sketch
 
 template<typename A>
-compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered):
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const Base& other, bool ordered):
 is_empty_(other.is_empty()),
 is_ordered_(other.is_ordered()),
 seed_hash_(other.get_seed_hash()),
@@ -116,21 +262,57 @@ entries_(std::move(entries))
 {}
 
 template<typename A>
-string<A> compact_theta_sketch_experimental<A>::to_string(bool detail) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  auto type = typeid(*this).name();
-  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
-  if (detail) {
-    for (const auto& hash: entries_) {
-      os << hash << std::endl;
-    }
-  }
-  return os.str();
+A compact_theta_sketch_experimental<A>::get_allocator() const {
+  return entries_.get_allocator();
 }
 
 template<typename A>
-A compact_theta_sketch_experimental<A>::get_allocator() const {
-  return entries_.get_allocator();
+bool compact_theta_sketch_experimental<A>::is_empty() const {
+  return is_empty_;
+}
+
+template<typename A>
+bool compact_theta_sketch_experimental<A>::is_ordered() const {
+  return is_ordered_;
+}
+
+template<typename A>
+uint64_t compact_theta_sketch_experimental<A>::get_theta64() const {
+  return theta_;
+}
+
+template<typename A>
+uint32_t compact_theta_sketch_experimental<A>::get_num_retained() const {
+  return entries_.size();
+}
+
+template<typename A>
+uint16_t compact_theta_sketch_experimental<A>::get_seed_hash() const {
+  return seed_hash_;
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::begin() -> iterator {
+  return iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::end() -> iterator {
+  return iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::begin() const -> const_iterator {
+  return const_iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+void compact_theta_sketch_experimental<A>::print_specifics(std::ostringstream& os) const {
 }
 
 } /* namespace datasketches */
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index 5407689..0fb0286 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -26,7 +26,7 @@
 namespace datasketches {
 
 TEST_CASE("theta_sketch_experimental: basics ", "[theta_sketch]") {
-  auto update_sketch = theta_sketch_experimental<>::builder().build();
+  auto update_sketch = update_theta_sketch_experimental<>::builder().build();
   update_sketch.update(1);
   update_sketch.update(2);
   REQUIRE(update_sketch.get_num_retained() == 2);
diff --git a/tuple/test/theta_union_experimental_test.cpp b/tuple/test/theta_union_experimental_test.cpp
index d08a070..c270a11 100644
--- a/tuple/test/theta_union_experimental_test.cpp
+++ b/tuple/test/theta_union_experimental_test.cpp
@@ -27,11 +27,11 @@
 namespace datasketches {
 
 TEST_CASE("theta_union_exeperimental") {
-  auto update_sketch1 = theta_sketch_experimental<>::builder().build();
+  auto update_sketch1 = update_theta_sketch_experimental<>::builder().build();
   update_sketch1.update(1);
   update_sketch1.update(2);
 
-  auto update_sketch2 = theta_sketch_experimental<>::builder().build();
+  auto update_sketch2 = update_theta_sketch_experimental<>::builder().build();
   update_sketch2.update(1);
   update_sketch2.update(3);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org