You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/22 22:55:33 UTC

[incubator-datasketches-cpp] 03/03: implementation

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit a4c2a8ea4393c544b4d3abfd79651fc43b3c5096
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 22 15:55:16 2020 -0700

    implementation
---
 tuple/include/theta_sketch_experimental.hpp      | 365 ++++++++++++++++++++---
 tuple/include/theta_sketch_experimental_impl.hpp | 262 +++++++++++++---
 tuple/test/theta_sketch_experimental_test.cpp    |   2 +-
 tuple/test/theta_union_experimental_test.cpp     |   4 +-
 4 files changed, 553 insertions(+), 80 deletions(-)

diff --git a/tuple/include/theta_sketch_experimental.hpp b/tuple/include/theta_sketch_experimental.hpp
index da15c0b..64629e7 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -27,68 +27,359 @@ namespace datasketches {
 
 // experimental theta sketch derived from the same base as tuple sketch
 
+template<typename Allocator = std::allocator<uint64_t>>
+class theta_sketch_experimental {
+public:
+  using Entry = uint64_t;
+  using ExtractKey = trivial_extract_key;
+  using iterator = theta_iterator<Entry, ExtractKey>;
+  using const_iterator = theta_const_iterator<Entry, ExtractKey>;
+
+  virtual ~theta_sketch_experimental() = default;
+
+  /**
+   * @return allocator
+   */
+  virtual Allocator get_allocator() const = 0;
+
+  /**
+   * @return true if this sketch represents an empty set (not the same as no retained entries!)
+   */
+  virtual bool is_empty() const = 0;
+
+  /**
+   * @return estimate of the distinct count of the input stream
+   */
+  double get_estimate() const;
+
+  /**
+   * Returns the approximate lower error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the lower bound
+   */
+  double get_lower_bound(uint8_t num_std_devs) const;
+
+  /**
+   * Returns the approximate upper error bound given a number of standard deviations.
+   * This parameter is similar to the number of standard deviations of the normal distribution
+   * and corresponds to approximately 67%, 95% and 99% confidence intervals.
+   * @param num_std_devs number of Standard Deviations (1, 2 or 3)
+   * @return the upper bound
+   */
+  double get_upper_bound(uint8_t num_std_devs) const;
+
+  /**
+   * @return true if the sketch is in estimation mode (as opposed to exact mode)
+   */
+  bool is_estimation_mode() const;
+
+  /**
+   * @return theta as a fraction from 0 to 1 (effective sampling rate)
+   */
+  double get_theta() const;
+
+  /**
+   * @return theta as a positive integer between 0 and LLONG_MAX
+   */
+  virtual uint64_t get_theta64() const = 0;
+
+  /**
+   * @return the number of retained entries in the sketch
+   */
+  virtual uint32_t get_num_retained() const = 0;
+
+  /**
+   * @return hash of the seed that was used to hash the input
+   */
+  virtual uint16_t get_seed_hash() const = 0;
+
+  /**
+   * @return true if retained entries are ordered
+   */
+  virtual bool is_ordered() const = 0;
+
+  /**
+   * Provides a human-readable summary of this sketch as a string
+   * @param print_items if true include the list of items retained by the sketch
+   * @return sketch summary as a string
+   */
+  virtual string<Allocator> to_string(bool print_items = false) const;
+
+  /**
+   * Iterator over hash values in this sketch.
+   * @return begin iterator
+   */
+  virtual iterator begin() = 0;
+
+  /**
+   * Iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
+   */
+  virtual iterator end() = 0;
+
+  /**
+   * Const iterator over hash values in this sketch.
+   * @return begin iterator
+   */
+  virtual const_iterator begin() const = 0;
+
+  /**
+   * Const iterator pointing past the valid range.
+   * Not to be incremented or dereferenced.
+   * @return end iterator
+   */
+  virtual const_iterator end() const = 0;
+
+protected:
+  virtual void print_specifics(std::ostringstream& os) const = 0;
+};
+
+// forward declaration
 template<typename A> class compact_theta_sketch_experimental;
 
-template<typename A = std::allocator<uint64_t>>
-class theta_sketch_experimental {
+template<typename Allocator = std::allocator<uint64_t>>
+class update_theta_sketch_experimental: public theta_sketch_experimental<Allocator> {
 public:
-  using resize_factor = theta_constants::resize_factor;
-
-  class builder: public theta_base_builder<builder> {
-  public:
-      builder(const A& allocator = A());
-      theta_sketch_experimental build() const;
-  private:
-      A allocator_;
-  };
-
-  A get_allocator() const { return table_.allocator_; };
-  bool is_empty() const { return table_.is_empty_; }
-  bool is_ordered() const { return false; }
-  uint16_t get_seed_hash() const { return compute_seed_hash(DEFAULT_SEED); }
-  uint64_t get_theta64() const { return table_.theta_; }
-  uint32_t get_num_retained() const { return table_.num_entries_; }
-
-  inline void update(uint64_t key);
-  void update(const void* key, size_t length);
+  using Base = theta_sketch_experimental<Allocator>;
+  using Entry = typename Base::Entry;
+  using ExtractKey = typename Base::ExtractKey;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using theta_table = theta_update_sketch_base<Entry, ExtractKey, Allocator>;
+  using resize_factor = typename theta_table::resize_factor;
 
-  void trim();
+  // No constructor here. Use builder instead.
+  class builder;
+
+  update_theta_sketch_experimental(const update_theta_sketch_experimental&) = default;
+  update_theta_sketch_experimental(update_theta_sketch_experimental&&) noexcept = default;
+  virtual ~update_theta_sketch_experimental() = default;
+  update_theta_sketch_experimental& operator=(const update_theta_sketch_experimental&) = default;
+  update_theta_sketch_experimental& operator=(update_theta_sketch_experimental&&) = default;
+
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint16_t get_seed_hash() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+
+  /**
+   * @return configured nominal number of entries in the sketch
+   */
+  uint8_t get_lg_k() const;
 
-  string<A> to_string(bool detail = false) const;
+  /**
+   * @return configured resize factor of the sketch
+   */
+  resize_factor get_rf() const;
 
-  using const_iterator = theta_const_iterator<uint64_t, trivial_extract_key>;
-  const_iterator begin() const;
-  const_iterator end() const;
+  /**
+   * Update this sketch with a given string.
+   * @param value string to update the sketch with
+   */
+  void update(const std::string& value);
 
-  compact_theta_sketch_experimental<A> compact(bool ordered = true) const;
+  /**
+   * Update this sketch with a given unsigned 64-bit integer.
+   * @param value uint64_t to update the sketch with
+   */
+  void update(uint64_t value);
+
+  /**
+   * Update this sketch with a given signed 64-bit integer.
+   * @param value int64_t to update the sketch with
+   */
+  void update(int64_t value);
+
+  /**
+   * Update this sketch with a given unsigned 32-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint32_t to update the sketch with
+   */
+  void update(uint32_t value);
+
+  /**
+   * Update this sketch with a given signed 32-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int32_t to update the sketch with
+   */
+  void update(int32_t value);
+
+  /**
+   * Update this sketch with a given unsigned 16-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint16_t to update the sketch with
+   */
+  void update(uint16_t value);
+
+  /**
+   * Update this sketch with a given signed 16-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int16_t to update the sketch with
+   */
+  void update(int16_t value);
+
+  /**
+   * Update this sketch with a given unsigned 8-bit integer.
+   * For compatibility with Java implementation.
+   * @param value uint8_t to update the sketch with
+   */
+  void update(uint8_t value);
+
+  /**
+   * Update this sketch with a given signed 8-bit integer.
+   * For compatibility with Java implementation.
+   * @param value int8_t to update the sketch with
+   */
+  void update(int8_t value);
+
+  /**
+   * Update this sketch with a given double-precision floating point value.
+   * For compatibility with Java implementation.
+   * @param value double to update the sketch with
+   */
+  void update(double value);
+
+  /**
+   * Update this sketch with a given floating point value.
+   * For compatibility with Java implementation.
+   * @param value float to update the sketch with
+   */
+  void update(float value);
+
+  /**
+   * Update this sketch with given data of any type.
+   * This is a "universal" update that covers all cases above,
+   * but may produce different hashes.
+   * Be very careful to hash input values consistently using the same approach
+   * both over time and on different platforms
+   * and while passing sketches between C++ environment and Java environment.
+   * Otherwise two sketches that should represent overlapping sets will be disjoint
+   * For instance, for signed 32-bit values call update(int32_t) method above,
+   * which does widening conversion to int64_t, if compatibility with Java is expected
+   * @param data pointer to the data
+   * @param length of the data in bytes
+   */
+  void update(const void* data, size_t length);
+
+  /**
+   * Remove retained entries in excess of the nominal size k (if any)
+   */
+  void trim();
+
+  /**
+   * Converts this sketch to a compact sketch (ordered or unordered).
+   * @param ordered optional flag to specify if ordered sketch should be produced
+   * @return compact sketch
+   */
+  compact_theta_sketch_experimental<Allocator> compact(bool ordered = true) const;
+
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
 private:
   enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
-  using theta_table = theta_update_sketch_base<uint64_t, trivial_extract_key, A>;
   theta_table table_;
 
-  theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator);
+  update_theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
+      uint64_t seed, const Allocator& allocator);
+
+  virtual void print_specifics(std::ostringstream& os) const;
 };
 
-template<typename A = std::allocator<uint64_t>>
-class compact_theta_sketch_experimental {
+// compact sketch
+
+template<typename Allocator = std::allocator<uint64_t>>
+class compact_theta_sketch_experimental: public theta_sketch_experimental<Allocator> {
 public:
-  compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered);
+  using Base = theta_sketch_experimental<Allocator>;
+  using iterator = typename Base::iterator;
+  using const_iterator = typename Base::const_iterator;
+  using AllocBytes = typename std::allocator_traits<Allocator>::template rebind_alloc<uint8_t>;
+  using vector_bytes = std::vector<uint8_t, AllocBytes>;
+
+  // Instances of this type can be obtained:
+  // - by compacting an update_theta_sketch
+  // - as a result of a set operation
+  // - by deserializing a previously serialized compact sketch
+
+  compact_theta_sketch_experimental(const Base& other, bool ordered);
+  compact_theta_sketch_experimental(const compact_theta_sketch_experimental&) = default;
+  compact_theta_sketch_experimental(compact_theta_sketch_experimental&&) noexcept = default;
+  virtual ~compact_theta_sketch_experimental() = default;
+  compact_theta_sketch_experimental& operator=(const compact_theta_sketch_experimental&) = default;
+  compact_theta_sketch_experimental& operator=(compact_theta_sketch_experimental&&) = default;
+
+  virtual Allocator get_allocator() const;
+  virtual bool is_empty() const;
+  virtual bool is_ordered() const;
+  virtual uint64_t get_theta64() const;
+  virtual uint32_t get_num_retained() const;
+  virtual uint16_t get_seed_hash() const;
+
+  /**
+   * This method serializes the sketch into a given stream in a binary form
+   * @param os output stream
+   */
+  void serialize(std::ostream& os) const;
 
-  compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, A>&& entries);
+  /**
+   * This method serializes the sketch as a vector of bytes.
+   * An optional header can be reserved in front of the sketch.
+   * It is an uninitialized space of a given size.
+   * This header is used in Datasketches PostgreSQL extension.
+   * @param header_size_bytes space to reserve in front of the sketch
+   */
+  vector_bytes serialize(unsigned header_size_bytes = 0) const;
 
-  uint32_t get_num_retained() const { return entries_.size(); }
+  virtual iterator begin();
+  virtual iterator end();
+  virtual const_iterator begin() const;
+  virtual const_iterator end() const;
 
-  string<A> to_string(bool detail = false) const;
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+  static compact_theta_sketch_experimental deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
 
-  A get_allocator() const;
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param seed the seed for the hash function that was used to create the sketch
+   * @return an instance of the sketch
+   */
+  static compact_theta_sketch_experimental deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
+
+  // for internal use
+  compact_theta_sketch_experimental(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
 
 private:
   bool is_empty_;
   bool is_ordered_;
   uint16_t seed_hash_;
   uint64_t theta_;
-  std::vector<uint64_t, A> entries_;
+  std::vector<uint64_t, Allocator> entries_;
+
+  virtual void print_specifics(std::ostringstream& os) const;
+};
+
+template<typename Allocator>
+class update_theta_sketch_experimental<Allocator>::builder: public theta_base_builder<builder> {
+public:
+    builder(const Allocator& allocator = Allocator());
+    update_theta_sketch_experimental build() const;
+private:
+    Allocator allocator_;
 };
 
 } /* namespace datasketches */
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp b/tuple/include/theta_sketch_experimental_impl.hpp
index 19fb707..4471215 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -19,24 +19,170 @@
 
 #include <sstream>
 
+#include "binomial_bounds.hpp"
+
 namespace datasketches {
 
-// experimental update theta sketch derived from the same base as tuple sketch
+template<typename A>
+bool theta_sketch_experimental<A>::is_estimation_mode() const {
+  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_theta() const {
+  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_estimate() const {
+  return get_num_retained() / get_theta();
+}
 
 template<typename A>
-theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+double theta_sketch_experimental<A>::get_lower_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+template<typename A>
+double theta_sketch_experimental<A>::get_upper_bound(uint8_t num_std_devs) const {
+  if (!is_estimation_mode()) return get_num_retained();
+  return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
+}
+
+template<typename A>
+string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
+  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
+  os << "### Theta sketch summary:" << std::endl;
+  os << "   num retained entries : " << get_num_retained() << std::endl;
+  os << "   seed hash            : " << get_seed_hash() << std::endl;
+  os << "   empty?               : " << (is_empty() ? "true" : "false") << std::endl;
+  os << "   ordered?             : " << (is_ordered() ? "true" : "false") << std::endl;
+  os << "   estimation mode?     : " << (is_estimation_mode() ? "true" : "false") << std::endl;
+  os << "   theta (fraction)     : " << get_theta() << std::endl;
+  os << "   theta (raw 64-bit)   : " << get_theta64() << std::endl;
+  os << "   estimate             : " << this->get_estimate() << std::endl;
+  os << "   lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
+  os << "   upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
+  print_specifics(os);
+  os << "### End sketch summary" << std::endl;
+  if (detail) {
+    os << "### Retained entries" << std::endl;
+    for (const auto& hash: *this) {
+      os << hash << std::endl;
+    }
+    os << "### End retained entries" << std::endl;
+  }
+  return os.str();
+}
+
+// update sketch
+
+template<typename A>
+update_theta_sketch_experimental<A>::update_theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
     uint64_t theta, uint64_t seed, const A& allocator):
 table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
 {}
 
 template<typename A>
-void theta_sketch_experimental<A>::update(uint64_t key) {
-  update(&key, sizeof(key));
+A update_theta_sketch_experimental<A>::get_allocator() const {
+  return table_.allocator_;
+}
+
+template<typename A>
+bool update_theta_sketch_experimental<A>::is_empty() const {
+  return table_.is_empty_;
+}
+
+template<typename A>
+bool update_theta_sketch_experimental<A>::is_ordered() const {
+  return false;
+}
+
+template<typename A>
+uint64_t update_theta_sketch_experimental<A>::get_theta64() const {
+  return table_.theta_;
+}
+
+template<typename A>
+uint32_t update_theta_sketch_experimental<A>::get_num_retained() const {
+  return table_.num_entries_;
+}
+
+template<typename A>
+uint16_t update_theta_sketch_experimental<A>::get_seed_hash() const {
+  return compute_seed_hash(table_.seed_);
+}
+
+template<typename A>
+uint8_t update_theta_sketch_experimental<A>::get_lg_k() const {
+  return table_.lg_nom_size_;
+}
+
+template<typename A>
+auto update_theta_sketch_experimental<A>::get_rf() const -> resize_factor {
+  return table_.rf_;
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint64_t value) {
+  update(&value, sizeof(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int64_t value) {
+  update(&value, sizeof(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint32_t value) {
+  update(static_cast<int32_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int32_t value) {
+  update(static_cast<int64_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(uint16_t value) {
+  update(static_cast<int16_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int16_t value) {
+  update(static_cast<int64_t>(value));
 }
 
 template<typename A>
-void theta_sketch_experimental<A>::update(const void* key, size_t length) {
-  const uint64_t hash = table_.hash_and_screen(key, length);
+void update_theta_sketch_experimental<A>::update(uint8_t value) {
+  update(static_cast<int8_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(int8_t value) {
+  update(static_cast<int64_t>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(double value) {
+  update(canonical_double(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(float value) {
+  update(static_cast<double>(value));
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(const std::string& value) {
+  if (value.empty()) return;
+  update(value.c_str(), value.length());
+}
+
+template<typename A>
+void update_theta_sketch_experimental<A>::update(const void* data, size_t length) {
+  const uint64_t hash = table_.hash_and_screen(data, length);
   if (hash == 0) return;
   auto result = table_.find(hash);
   if (!result.second) {
@@ -45,55 +191,55 @@ void theta_sketch_experimental<A>::update(const void* key, size_t length) {
 }
 
 template<typename A>
-void theta_sketch_experimental<A>::trim() {
+void update_theta_sketch_experimental<A>::trim() {
   table_.trim();
 }
 
 template<typename A>
-string<A> theta_sketch_experimental<A>::to_string(bool detail) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  auto type = typeid(*this).name();
-  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
-  os << table_.to_string();
-  if (detail) {
-    for (const auto& it: table_) {
-      if (it != 0) {
-        os << it << std::endl;
-      }
-    }
-  }
-  return os.str();
+auto update_theta_sketch_experimental<A>::begin() -> iterator {
+  return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-auto theta_sketch_experimental<A>::begin() const -> const_iterator {
+auto update_theta_sketch_experimental<A>::end() -> iterator {
+  return iterator(nullptr, 0, 1 << table_.lg_cur_size_);
+}
+
+template<typename A>
+auto update_theta_sketch_experimental<A>::begin() const -> const_iterator {
   return const_iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
 }
 
 template<typename A>
-auto theta_sketch_experimental<A>::end() const -> const_iterator {
+auto update_theta_sketch_experimental<A>::end() const -> const_iterator {
   return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
 }
-
 template<typename A>
-compact_theta_sketch_experimental<A> theta_sketch_experimental<A>::compact(bool ordered) const {
+compact_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::compact(bool ordered) const {
   return compact_theta_sketch_experimental<A>(*this, ordered);
 }
 
+template<typename A>
+void update_theta_sketch_experimental<A>::print_specifics(std::ostringstream& os) const {
+  os << "   lg nominal size      : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
+  os << "   lg current size      : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
+  os << "   resize factor        : " << (1 << table_.rf_) << std::endl;
+}
+
 // builder
 
 template<typename A>
-theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+update_theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
 
 template<typename A>
-theta_sketch_experimental<A> theta_sketch_experimental<A>::builder::build() const {
-  return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, allocator_);
+update_theta_sketch_experimental<A> update_theta_sketch_experimental<A>::builder::build() const {
+  return update_theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, allocator_);
 }
 
 // experimental compact theta sketch
 
 template<typename A>
-compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const theta_sketch_experimental<A>& other, bool ordered):
+compact_theta_sketch_experimental<A>::compact_theta_sketch_experimental(const Base& other, bool ordered):
 is_empty_(other.is_empty()),
 is_ordered_(other.is_ordered()),
 seed_hash_(other.get_seed_hash()),
@@ -116,21 +262,57 @@ entries_(std::move(entries))
 {}
 
 template<typename A>
-string<A> compact_theta_sketch_experimental<A>::to_string(bool detail) const {
-  std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
-  auto type = typeid(*this).name();
-  os << "sizeof(" << type << ")=" << sizeof(*this) << std::endl;
-  if (detail) {
-    for (const auto& hash: entries_) {
-      os << hash << std::endl;
-    }
-  }
-  return os.str();
+A compact_theta_sketch_experimental<A>::get_allocator() const {
+  return entries_.get_allocator();
 }
 
 template<typename A>
-A compact_theta_sketch_experimental<A>::get_allocator() const {
-  return entries_.get_allocator();
+bool compact_theta_sketch_experimental<A>::is_empty() const {
+  return is_empty_;
+}
+
+template<typename A>
+bool compact_theta_sketch_experimental<A>::is_ordered() const {
+  return is_ordered_;
+}
+
+template<typename A>
+uint64_t compact_theta_sketch_experimental<A>::get_theta64() const {
+  return theta_;
+}
+
+template<typename A>
+uint32_t compact_theta_sketch_experimental<A>::get_num_retained() const {
+  return entries_.size();
+}
+
+template<typename A>
+uint16_t compact_theta_sketch_experimental<A>::get_seed_hash() const {
+  return seed_hash_;
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::begin() -> iterator {
+  return iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::end() -> iterator {
+  return iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::begin() const -> const_iterator {
+  return const_iterator(entries_.data(), entries_.size(), 0);
+}
+
+template<typename A>
+auto compact_theta_sketch_experimental<A>::end() const -> const_iterator {
+  return const_iterator(nullptr, 0, entries_.size());
+}
+
+template<typename A>
+void compact_theta_sketch_experimental<A>::print_specifics(std::ostringstream& os) const {
 }
 
 } /* namespace datasketches */
diff --git a/tuple/test/theta_sketch_experimental_test.cpp b/tuple/test/theta_sketch_experimental_test.cpp
index 5407689..0fb0286 100644
--- a/tuple/test/theta_sketch_experimental_test.cpp
+++ b/tuple/test/theta_sketch_experimental_test.cpp
@@ -26,7 +26,7 @@
 namespace datasketches {
 
 TEST_CASE("theta_sketch_experimental: basics ", "[theta_sketch]") {
-  auto update_sketch = theta_sketch_experimental<>::builder().build();
+  auto update_sketch = update_theta_sketch_experimental<>::builder().build();
   update_sketch.update(1);
   update_sketch.update(2);
   REQUIRE(update_sketch.get_num_retained() == 2);
diff --git a/tuple/test/theta_union_experimental_test.cpp b/tuple/test/theta_union_experimental_test.cpp
index d08a070..c270a11 100644
--- a/tuple/test/theta_union_experimental_test.cpp
+++ b/tuple/test/theta_union_experimental_test.cpp
@@ -27,11 +27,11 @@
 namespace datasketches {
 
 TEST_CASE("theta_union_exeperimental") {
-  auto update_sketch1 = theta_sketch_experimental<>::builder().build();
+  auto update_sketch1 = update_theta_sketch_experimental<>::builder().build();
   update_sketch1.update(1);
   update_sketch1.update(2);
 
-  auto update_sketch2 = theta_sketch_experimental<>::builder().build();
+  auto update_sketch2 = update_theta_sketch_experimental<>::builder().build();
   update_sketch2.update(1);
   update_sketch2.update(3);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org