You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/12/14 03:42:38 UTC

[datasketches-cpp] branch density_sketch updated: added iterator and to_string

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch density_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/density_sketch by this push:
     new c990720  added iterator and to_string
c990720 is described below

commit c99072026f12c8e740fb4a593b68cbaf92c0e3e2
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Dec 13 19:42:32 2022 -0800

    added iterator and to_string
---
 density/include/density_sketch.hpp      |  35 +++++++++
 density/include/density_sketch_impl.hpp | 124 +++++++++++++++++++++++++++++++-
 density/test/density_sketch_test.cpp    |  17 ++++-
 3 files changed, 173 insertions(+), 3 deletions(-)

diff --git a/density/include/density_sketch.hpp b/density/include/density_sketch.hpp
index a7edb93..551d51f 100755
--- a/density/include/density_sketch.hpp
+++ b/density/include/density_sketch.hpp
@@ -26,6 +26,8 @@
 #include <numeric>
 #include <cmath>
 
+#include "common_defs.hpp"
+
 /*
  * Based on the following paper:
  * Zohar Karnin, Edo Liberty "Discrepancy, Coresets, and Sketches in Machine Learning"
@@ -114,6 +116,17 @@ public:
    */
   Allocator get_allocator() const;
 
+  /**
+   * Prints a summary of the sketch.
+   * @param print_levels if true include information about levels
+   * @param print_items if true include sketch data
+   */
+  string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
+
+  class const_iterator;
+  const_iterator begin() const;
+  const_iterator end() const;
+
 private:
   uint16_t k_;
   uint32_t dim_;
@@ -125,6 +138,28 @@ private:
   void compact_level(unsigned height);
 };
 
+template<typename T, typename K, typename A>
+class density_sketch<T, K, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+public:
+  using Vector = density_sketch<T, K, A>::Vector;
+  using value_type = std::pair<const Vector&, const uint64_t>;
+  const_iterator& operator++();
+  const_iterator& operator++(int);
+  bool operator==(const const_iterator& other) const;
+  bool operator!=(const const_iterator& other) const;
+  const value_type operator*() const;
+  const return_value_holder<value_type> operator->() const;
+private:
+  using LevelsIterator = typename density_sketch<T, K, A>::Levels::const_iterator;
+  using LevelIterator = typename density_sketch<T, K, A>::Level::const_iterator;
+  LevelsIterator levels_it_;
+  LevelsIterator levels_end_;
+  LevelIterator level_it_;
+  unsigned height_;
+  friend class density_sketch<T, K, A>;
+  const_iterator(LevelsIterator begin, LevelsIterator end);
+};
+
 } /* namespace datasketches */
 
 #include "density_sketch_impl.hpp"
diff --git a/density/include/density_sketch_impl.hpp b/density/include/density_sketch_impl.hpp
index 895a3d6..6771767 100755
--- a/density/include/density_sketch_impl.hpp
+++ b/density/include/density_sketch_impl.hpp
@@ -21,8 +21,8 @@
 #define DENSITY_SKETCH_IMPL_HPP_
 
 #include <algorithm>
+#include <sstream>
 
-#include "common_defs.hpp"
 #include "conditional_forward.hpp"
 
 namespace datasketches {
@@ -140,6 +140,128 @@ void density_sketch<T, K, A>::compact_level(unsigned height) {
   level.clear();
 }
 
+template<typename T, typename K, typename A>
+string<A> density_sketch<T, K, A>::to_string(bool print_levels, bool print_items) const {
+  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
+  // The stream does not support passing an allocator instance, and alternatives are complicated.
+  std::ostringstream os;
+  os << "### Density sketch summary:" << std::endl;
+  os << "   K              : " << k_ << std::endl;
+  os << "   Dim            : " << dim_ << std::endl;
+  os << "   Empty          : " << (is_empty() ? "true" : "false") << std::endl;
+  os << "   N              : " << n_ << std::endl;
+  os << "   Retained items : " << num_retained_ << std::endl;
+  os << "   Levels         : " << levels_.size() << std::endl;
+  os << "### End sketch summary" << std::endl;
+
+  if (print_levels) {
+    os << "### Density sketch levels:" << std::endl;
+    os << "   height: size" << std::endl;
+    for (unsigned height = 0; height < levels_.size(); ++height) {
+      os << "   " << height << ": "
+        << levels_[height].size() << std::endl;
+    }
+    os << "### End sketch levels" << std::endl;
+  }
+
+  if (print_items) {
+    os << "### Density sketch data:" << std::endl;
+    unsigned level = 0;
+    for (unsigned height = 0; height < levels_.size(); ++height) {
+      os << " level " << height << ": " << std::endl;
+      for (const auto& point: levels_[height]) {
+        os << "   [";
+        bool first = true;
+        for (auto value: point) {
+          if (first) {
+            first = false;
+          } else {
+            os << ", ";
+          }
+          os << value;
+        }
+        os << "]" << std::endl;
+      }
+      ++level;
+    }
+    os << "### End sketch data" << std::endl;
+  }
+  return string<A>(os.str().c_str(), levels_.get_allocator());
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::begin() const -> const_iterator {
+  return const_iterator(levels_.begin(), levels_.end());
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::end() const -> const_iterator {
+  return const_iterator(levels_.end(), levels_.end());
+}
+
+// iterator
+
+template<typename T, typename K, typename A>
+density_sketch<T, K, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
+levels_it_(begin),
+levels_end_(end),
+level_it_(),
+height_(0)
+{
+  // skip empty levels
+  while (levels_it_ != levels_end_) {
+    level_it_ = levels_it_->begin();
+    if (level_it_ != levels_it_->end()) break;
+    ++levels_it_;
+  }
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator++() -> const_iterator& {
+  ++level_it_;
+  if (level_it_ == levels_it_->end()) {
+    ++levels_it_;
+    ++height_;
+    // skip empty levels
+    while (levels_it_ != levels_end_) {
+      level_it_ = levels_it_->begin();
+      if (level_it_ != levels_it_->end()) break;
+      ++levels_it_;
+      ++height_;
+    }
+  }
+  return *this;
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator++(int) -> const_iterator& {
+  const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+
+template<typename T, typename K, typename A>
+bool density_sketch<T, K, A>::const_iterator::operator==(const const_iterator& other) const {
+  if (levels_it_ != other.levels_it_) return false;
+  if (levels_it_ == levels_end_) return true;
+  return level_it_ == other.level_it_;
+}
+
+template<typename T, typename K, typename A>
+bool density_sketch<T, K, A>::const_iterator::operator!=(const const_iterator& other) const {
+  return !operator==(other);
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator*() const -> const value_type {
+  return value_type(*level_it_, 1ULL << height_);
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
+  return **this;
+}
+
 } /* namespace datasketches */
 
 #endif
diff --git a/density/test/density_sketch_test.cpp b/density/test/density_sketch_test.cpp
index 704d3ac..a51ceb2 100755
--- a/density/test/density_sketch_test.cpp
+++ b/density/test/density_sketch_test.cpp
@@ -21,8 +21,6 @@
 
 #include <density_sketch.hpp>
 
-#include <iostream>
-
 namespace datasketches {
 
 TEST_CASE("density sketch: empty", "[density_sketch]") {
@@ -58,4 +56,19 @@ TEST_CASE("density sketch: merge", "[density_sketch]") {
   REQUIRE(sketch1.get_num_retained() == 3);
 }
 
+TEST_CASE("density sketch: iterator", "[density_sketch]") {
+  density_sketch<float> sketch(10, 3);
+  unsigned n = 1000;
+  for (unsigned i = 1; i <= n; ++i) sketch.update(std::vector<float>(3, i));
+  REQUIRE(sketch.get_n() == n);
+  //std::cout << sketch.to_string(true, true);
+  unsigned count = 0;
+  for (auto pair: sketch) {
+    ++count;
+    // just to assert something about the output
+    REQUIRE(pair.first.size() == sketch.get_dim());
+  }
+  REQUIRE(count == sketch.get_num_retained());
+}
+
 } /* namespace datasketches */


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org