You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/12/14 03:42:38 UTC
[datasketches-cpp] branch density_sketch updated: added iterator and to_string
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch density_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/density_sketch by this push:
new c990720 added iterator and to_string
c990720 is described below
commit c99072026f12c8e740fb4a593b68cbaf92c0e3e2
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Dec 13 19:42:32 2022 -0800
added iterator and to_string
---
density/include/density_sketch.hpp | 35 +++++++++
density/include/density_sketch_impl.hpp | 124 +++++++++++++++++++++++++++++++-
density/test/density_sketch_test.cpp | 17 ++++-
3 files changed, 173 insertions(+), 3 deletions(-)
diff --git a/density/include/density_sketch.hpp b/density/include/density_sketch.hpp
index a7edb93..551d51f 100755
--- a/density/include/density_sketch.hpp
+++ b/density/include/density_sketch.hpp
@@ -26,6 +26,8 @@
#include <numeric>
#include <cmath>
+#include "common_defs.hpp"
+
/*
* Based on the following paper:
* Zohar Karnin, Edo Liberty "Discrepancy, Coresets, and Sketches in Machine Learning"
@@ -114,6 +116,17 @@ public:
*/
Allocator get_allocator() const;
+ /**
+ * Prints a summary of the sketch.
+ * @param print_levels if true include information about levels
+ * @param print_items if true include sketch data
+ */
+ string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
+
+ class const_iterator;
+ const_iterator begin() const;
+ const_iterator end() const;
+
private:
uint16_t k_;
uint32_t dim_;
@@ -125,6 +138,28 @@ private:
void compact_level(unsigned height);
};
+template<typename T, typename K, typename A>
+class density_sketch<T, K, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+public:
+ using Vector = density_sketch<T, K, A>::Vector;
+ using value_type = std::pair<const Vector&, const uint64_t>;
+ const_iterator& operator++();
+ const_iterator& operator++(int);
+ bool operator==(const const_iterator& other) const;
+ bool operator!=(const const_iterator& other) const;
+ const value_type operator*() const;
+ const return_value_holder<value_type> operator->() const;
+private:
+ using LevelsIterator = typename density_sketch<T, K, A>::Levels::const_iterator;
+ using LevelIterator = typename density_sketch<T, K, A>::Level::const_iterator;
+ LevelsIterator levels_it_;
+ LevelsIterator levels_end_;
+ LevelIterator level_it_;
+ unsigned height_;
+ friend class density_sketch<T, K, A>;
+ const_iterator(LevelsIterator begin, LevelsIterator end);
+};
+
} /* namespace datasketches */
#include "density_sketch_impl.hpp"
diff --git a/density/include/density_sketch_impl.hpp b/density/include/density_sketch_impl.hpp
index 895a3d6..6771767 100755
--- a/density/include/density_sketch_impl.hpp
+++ b/density/include/density_sketch_impl.hpp
@@ -21,8 +21,8 @@
#define DENSITY_SKETCH_IMPL_HPP_
#include <algorithm>
+#include <sstream>
-#include "common_defs.hpp"
#include "conditional_forward.hpp"
namespace datasketches {
@@ -140,6 +140,128 @@ void density_sketch<T, K, A>::compact_level(unsigned height) {
level.clear();
}
+template<typename T, typename K, typename A>
+string<A> density_sketch<T, K, A>::to_string(bool print_levels, bool print_items) const {
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
+ std::ostringstream os;
+ os << "### Density sketch summary:" << std::endl;
+ os << " K : " << k_ << std::endl;
+ os << " Dim : " << dim_ << std::endl;
+ os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
+ os << " N : " << n_ << std::endl;
+ os << " Retained items : " << num_retained_ << std::endl;
+ os << " Levels : " << levels_.size() << std::endl;
+ os << "### End sketch summary" << std::endl;
+
+ if (print_levels) {
+ os << "### Density sketch levels:" << std::endl;
+ os << " height: size" << std::endl;
+ for (unsigned height = 0; height < levels_.size(); ++height) {
+ os << " " << height << ": "
+ << levels_[height].size() << std::endl;
+ }
+ os << "### End sketch levels" << std::endl;
+ }
+
+ if (print_items) {
+ os << "### Density sketch data:" << std::endl;
+ unsigned level = 0;
+ for (unsigned height = 0; height < levels_.size(); ++height) {
+ os << " level " << height << ": " << std::endl;
+ for (const auto& point: levels_[height]) {
+ os << " [";
+ bool first = true;
+ for (auto value: point) {
+ if (first) {
+ first = false;
+ } else {
+ os << ", ";
+ }
+ os << value;
+ }
+ os << "]" << std::endl;
+ }
+ ++level;
+ }
+ os << "### End sketch data" << std::endl;
+ }
+ return string<A>(os.str().c_str(), levels_.get_allocator());
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::begin() const -> const_iterator {
+ return const_iterator(levels_.begin(), levels_.end());
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::end() const -> const_iterator {
+ return const_iterator(levels_.end(), levels_.end());
+}
+
+// iterator
+
+template<typename T, typename K, typename A>
+density_sketch<T, K, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
+levels_it_(begin),
+levels_end_(end),
+level_it_(),
+height_(0)
+{
+ // skip empty levels
+ while (levels_it_ != levels_end_) {
+ level_it_ = levels_it_->begin();
+ if (level_it_ != levels_it_->end()) break;
+ ++levels_it_;
+ }
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator++() -> const_iterator& {
+ ++level_it_;
+ if (level_it_ == levels_it_->end()) {
+ ++levels_it_;
+ ++height_;
+ // skip empty levels
+ while (levels_it_ != levels_end_) {
+ level_it_ = levels_it_->begin();
+ if (level_it_ != levels_it_->end()) break;
+ ++levels_it_;
+ ++height_;
+ }
+ }
+ return *this;
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator++(int) -> const_iterator& {
+ const_iterator tmp(*this);
+ operator++();
+ return tmp;
+}
+
+template<typename T, typename K, typename A>
+bool density_sketch<T, K, A>::const_iterator::operator==(const const_iterator& other) const {
+ if (levels_it_ != other.levels_it_) return false;
+ if (levels_it_ == levels_end_) return true;
+ return level_it_ == other.level_it_;
+}
+
+template<typename T, typename K, typename A>
+bool density_sketch<T, K, A>::const_iterator::operator!=(const const_iterator& other) const {
+ return !operator==(other);
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator*() const -> const value_type {
+ return value_type(*level_it_, 1ULL << height_);
+}
+
+template<typename T, typename K, typename A>
+auto density_sketch<T, K, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
+ return **this;
+}
+
} /* namespace datasketches */
#endif
diff --git a/density/test/density_sketch_test.cpp b/density/test/density_sketch_test.cpp
index 704d3ac..a51ceb2 100755
--- a/density/test/density_sketch_test.cpp
+++ b/density/test/density_sketch_test.cpp
@@ -21,8 +21,6 @@
#include <density_sketch.hpp>
-#include <iostream>
-
namespace datasketches {
TEST_CASE("density sketch: empty", "[density_sketch]") {
@@ -58,4 +56,19 @@ TEST_CASE("density sketch: merge", "[density_sketch]") {
REQUIRE(sketch1.get_num_retained() == 3);
}
+TEST_CASE("density sketch: iterator", "[density_sketch]") {
+ density_sketch<float> sketch(10, 3);
+ unsigned n = 1000;
+ for (unsigned i = 1; i <= n; ++i) sketch.update(std::vector<float>(3, i));
+ REQUIRE(sketch.get_n() == n);
+ //std::cout << sketch.to_string(true, true);
+ unsigned count = 0;
+ for (auto pair: sketch) {
+ ++count;
+ // just to assert something about the output
+ REQUIRE(pair.first.size() == sketch.get_dim());
+ }
+ REQUIRE(count == sketch.get_num_retained());
+}
+
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org