You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/06/03 00:51:57 UTC
[datasketches-cpp] 01/01: converting constructor
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch kll_converting_constructor
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit a92a26f7f78453afec6f90b02566dc014b45e3f7
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Jun 2 17:51:26 2022 -0700
converting constructor
---
kll/include/kll_sketch.hpp | 40 ++++++++++++++++-----
kll/include/kll_sketch_impl.hpp | 73 +++++++++++++++++++++++++++++++++++---
kll/test/kll_sketch_test.cpp | 77 +++++++++++++++++++++++++++++++++++++----
3 files changed, 172 insertions(+), 18 deletions(-)
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index fcf8ebb..8616623 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -182,6 +182,14 @@ class kll_sketch {
kll_sketch& operator=(const kll_sketch& other);
kll_sketch& operator=(kll_sketch&& other);
+ /*
+ * Type converting constructor.
+ * @param other sketch of a different type
+ * @param allocator instance of an Allocator
+ */
+ template<typename TT, typename CC, typename AA>
+ explicit kll_sketch(const kll_sketch<TT, CC, AA> & other, const A& allocator = A());
+
/**
* Updates this sketch with the given data item.
* @param value an item from a stream of items
@@ -208,6 +216,13 @@ class kll_sketch {
*/
uint16_t get_k() const;
+ /**
+ * Returns min_k, which is an internal parameter for error estimation
+ * after merging sketches with different k.
+ * @return parameter min_k
+ */
+ uint16_t get_min_k() const;
+
/**
* Returns the length of the input stream.
* @return stream length
@@ -220,6 +235,13 @@ class kll_sketch {
*/
uint32_t get_num_retained() const;
+ /**
+ * Returns the current capacity in items allocated by the sketch.
+ * For internal use.
+ * @return the capacity
+ */
+ uint32_t get_capacity() const;
+
/**
* Returns true if this sketch is in estimation mode.
* @return estimation mode flag
@@ -390,7 +412,7 @@ class kll_sketch {
/**
* Computes size needed to serialize the current state of the sketch.
* This version is for fixed-size arithmetic types (integral and floating point).
- * @param instance of a SerDe
+ * @param serde instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
@@ -399,7 +421,7 @@ class kll_sketch {
/**
* Computes size needed to serialize the current state of the sketch.
* This version is for all other types and can be expensive since every item needs to be looked at.
- * @param instance of a SerDe
+ * @param serde instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
@@ -459,7 +481,7 @@ class kll_sketch {
/**
* This method deserializes a sketch from a given stream.
* @param is input stream
- * @param instance of an Allocator
+ * @param allocator instance of an Allocator
* @return an instance of a sketch
*
* Deprecated, to be removed in the next major version
@@ -469,8 +491,8 @@ class kll_sketch {
/**
* This method deserializes a sketch from a given stream.
* @param is input stream
- * @param instance of a SerDe
- * @param instance of an Allocator
+ * @param serge instance of a SerDe
+ * @param allocator instance of an Allocator
* @return an instance of a sketch
*/
template<typename SerDe = S>
@@ -480,7 +502,7 @@ class kll_sketch {
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
- * @param instance of an Allocator
+ * @param allocator instance of an Allocator
* @return an instance of a sketch
*
* Deprecated, to be removed in the next major version
@@ -491,8 +513,8 @@ class kll_sketch {
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
* @param size the size of the array
- * @param instance of a SerDe
- * @param instance of an Allocator
+ * @param serde instance of a SerDe
+ * @param allocator instance of an Allocator
* @return an instance of a sketch
*/
template<typename SerDe = S>
@@ -606,6 +628,8 @@ class kll_sketch {
static void check_serial_version(uint8_t serial_version);
static void check_family_id(uint8_t family_id);
+ void check_sorting() const;
+
// implementations for floating point types
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
static const TT& get_invalid_value() {
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index 1302ac9..211a926 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -26,6 +26,7 @@
#include <stdexcept>
#include "conditional_forward.hpp"
+#include "count_zeros.hpp"
#include "memory_operations.hpp"
#include "kll_helper.hpp"
@@ -69,7 +70,7 @@ max_value_(nullptr),
is_level_zero_sorted_(other.is_level_zero_sorted_)
{
items_ = allocator_.allocate(items_size_);
- std::copy(other.items_ + levels_[0], other.items_ + levels_[num_levels_], items_ + levels_[0]);
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
}
@@ -147,6 +148,50 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
}
}
+template<typename T, typename C, typename S, typename A>
+template<typename TT, typename CC, typename AA>
+kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const A& allocator):
+allocator_(allocator),
+k_(other.get_k()),
+m_(DEFAULT_M),
+min_k_(other.get_min_k()),
+n_(other.get_n()),
+num_levels_(1),
+levels_(2, 0, allocator),
+items_(nullptr),
+items_size_(other.get_capacity()),
+min_value_(nullptr),
+max_value_(nullptr),
+is_level_zero_sorted_(false)
+{
+ static_assert(
+ std::is_constructible<T, TT>::value,
+ "Type converting constructor requires new type to be constructible from existing type"
+ );
+ items_ = allocator_.allocate(items_size_);
+ levels_[0] = items_size_ - other.get_num_retained();
+ levels_[1] = items_size_;
+
+ if (!other.is_empty()) {
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
+ size_t index = levels_[0];
+ for (auto pair: other) {
+ new (&items_[index]) T(pair.first);
+ const uint8_t level = count_trailing_zeros_in_u64(pair.second);
+ if (level == num_levels_) {
+ ++num_levels_;
+ levels_.resize(num_levels_ + 1);
+ levels_[level] = index;
+ levels_[num_levels_] = items_size_;
+ }
+ ++index;
+ }
+ }
+ check_sorting();
+ assert_correct_total_weight();
+}
+
template<typename T, typename C, typename S, typename A>
template<typename FwdT>
void kll_sketch<T, C, S, A>::update(FwdT&& value) {
@@ -210,6 +255,11 @@ uint16_t kll_sketch<T, C, S, A>::get_k() const {
return k_;
}
+template<typename T, typename C, typename S, typename A>
+uint16_t kll_sketch<T, C, S, A>::get_min_k() const {
+ return min_k_;
+}
+
template<typename T, typename C, typename S, typename A>
uint64_t kll_sketch<T, C, S, A>::get_n() const {
return n_;
@@ -220,6 +270,11 @@ uint32_t kll_sketch<T, C, S, A>::get_num_retained() const {
return levels_[num_levels_] - levels_[0];
}
+template<typename T, typename C, typename S, typename A>
+uint32_t kll_sketch<T, C, S, A>::get_capacity() const {
+ return items_size_;
+}
+
template<typename T, typename C, typename S, typename A>
bool kll_sketch<T, C, S, A>::is_estimation_mode() const {
return num_levels_ > 1;
@@ -780,17 +835,27 @@ void kll_sketch<T, C, S, A>::sort_level_zero() {
}
}
+template<typename T, typename C, typename S, typename A>
+void kll_sketch<T, C, S, A>::check_sorting() const {
+ // not checking level 0
+ for (uint8_t level = 1; level < num_levels_; ++level) {
+ const auto from = items_ + levels_[level];
+ const auto to = items_ + levels_[level + 1];
+ if (!std::is_sorted(from, to, C())) {
+ throw std::logic_error("levels must be sorted");
+ }
+ }
+}
+
template<typename T, typename C, typename S, typename A>
template<bool inclusive>
quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
- uint8_t level = 0;
- while (level < num_levels_) {
+ for (uint8_t level = 0; level < num_levels_; ++level) {
const auto from = items_ + levels_[level];
const auto to = items_ + levels_[level + 1]; // exclusive
view.add(from, to, 1 << level);
- ++level;
}
if (cumulative) view.template convert_to_cummulative<inclusive>();
return view;
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index a8bd040..acf69df 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
#endif
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
-typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
+using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
-typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
+using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
TEST_CASE("kll sketch", "[kll_sketch]") {
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
(void) it; // to suppress "unused" warning
FAIL("should be no iterations over an empty sketch");
}
-}
+ }
SECTION("get bad quantile") {
kll_float_sketch sketch(200, 0);
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE((*it).second == 3);
}
}
- // cleanup
- if (test_allocator_total_bytes != 0) {
- REQUIRE(test_allocator_total_bytes == 0);
+
+ SECTION("type conversion: empty") {
+ kll_sketch<double> kll_double;
+ kll_sketch<float> kll_float(kll_double);
+ REQUIRE(kll_float.is_empty());
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
+ REQUIRE(kll_float.get_n() == 0);
+ REQUIRE(kll_float.get_num_retained() == 0);
+ }
+
+ SECTION("type conversion: over k") {
+ kll_sketch<double> kll_double;
+ for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
+ kll_sketch<float> kll_float(kll_double);
+ REQUIRE(!kll_float.is_empty());
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
+ REQUIRE(kll_float.get_n() == kll_double.get_n());
+ REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
+
+ auto sv_float = kll_float.get_sorted_view(false);
+ auto sv_double = kll_double.get_sorted_view(false);
+ auto sv_float_it = sv_float.begin();
+ auto sv_double_it = sv_double.begin();
+ while (sv_float_it != sv_float.end()) {
+ REQUIRE(sv_double_it != sv_double.end());
+ auto float_pair = *sv_float_it;
+ auto double_pair = *sv_double_it;
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
+ REQUIRE(float_pair.second == double_pair.second);
+ ++sv_float_it;
+ ++sv_double_it;
+ }
+ REQUIRE(sv_double_it == sv_double.end());
+ }
+
+ class A {
+ int val;
+ public:
+ A(int val): val(val) {}
+ int get_val() const { return val; }
+ };
+
+ struct less_A {
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
+ };
+
+ class B {
+ int val;
+ public:
+ explicit B(const A& a): val(a.get_val()) {}
+ int get_val() const { return val; }
+ };
+
+ struct less_B {
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
+ };
+
+ SECTION("type conversion: custom types") {
+ kll_sketch<A, less_A> sa;
+ sa.update(1);
+ sa.update(2);
+ sa.update(3);
+
+ kll_sketch<B, less_B> sb(sa);
+ REQUIRE(sb.get_n() == 3);
}
+
+ // cleanup
+ REQUIRE(test_allocator_total_bytes == 0);
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org