You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/10/04 04:55:29 UTC
[datasketches-cpp] 01/01: sorted view to support both inclusive and exclusive, no serde in class template
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch universal_sorted_view
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit cd23f81c997bf8eb0aaadfa2aaa000281236e156
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Oct 3 21:55:21 2022 -0700
sorted view to support both inclusive and exclusive, no serde in class
template
---
common/include/kolmogorov_smirnov_impl.hpp | 12 +-
common/include/quantile_sketch_sorted_view.hpp | 20 +-
.../include/quantile_sketch_sorted_view_impl.hpp | 33 +-
common/test/CMakeLists.txt | 33 +-
common/test/quantile_sketch_sorted_view_test.cpp | 444 +++++++++++++++++++
cpc/test/CMakeLists.txt | 2 +-
fi/test/CMakeLists.txt | 2 +-
hll/test/CMakeLists.txt | 2 +-
kll/include/kll_sketch.hpp | 211 ++++-----
kll/include/kll_sketch_impl.hpp | 469 +++++++++------------
kll/test/CMakeLists.txt | 2 +-
kll/test/kll_sketch_custom_type_test.cpp | 36 +-
kll/test/kll_sketch_test.cpp | 278 ++++++------
kll/test/kll_sketch_validation.cpp | 66 +--
python/src/kll_wrapper.cpp | 69 +--
python/src/quantiles_wrapper.cpp | 67 +--
python/src/req_wrapper.cpp | 69 +--
python/src/vector_of_kll.cpp | 128 +++---
quantiles/include/quantiles_sketch.hpp | 194 ++++-----
quantiles/include/quantiles_sketch_impl.hpp | 328 +++++++-------
quantiles/test/CMakeLists.txt | 2 +-
quantiles/test/quantiles_compatibility_test.cpp | 32 +-
quantiles/test/quantiles_sketch_test.cpp | 255 +++++------
req/include/req_compactor.hpp | 3 +-
req/include/req_compactor_impl.hpp | 3 +-
req/include/req_sketch.hpp | 171 ++++----
req/include/req_sketch_impl.hpp | 437 +++++++++----------
req/test/CMakeLists.txt | 2 +-
req/test/req_sketch_custom_type_test.cpp | 36 +-
req/test/req_sketch_test.cpp | 195 +++++----
sampling/test/CMakeLists.txt | 2 +-
theta/test/CMakeLists.txt | 2 +-
tuple/test/CMakeLists.txt | 2 +-
33 files changed, 1900 insertions(+), 1707 deletions(-)
diff --git a/common/include/kolmogorov_smirnov_impl.hpp b/common/include/kolmogorov_smirnov_impl.hpp
index dff3bc7..8cfb979 100644
--- a/common/include/kolmogorov_smirnov_impl.hpp
+++ b/common/include/kolmogorov_smirnov_impl.hpp
@@ -28,16 +28,16 @@ namespace datasketches {
template<typename Sketch>
double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
- auto view1 = sketch1.get_sorted_view(true);
- auto view2 = sketch2.get_sorted_view(true);
+ auto view1 = sketch1.get_sorted_view();
+ auto view2 = sketch2.get_sorted_view();
auto it1 = view1.begin();
auto it2 = view2.begin();
const auto n1 = sketch1.get_n();
const auto n2 = sketch2.get_n();
double delta = 0;
while (it1 != view1.end() && it2 != view2.end()) {
- const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
- const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
+ const double norm_cum_wt1 = static_cast<double>(it1.get_cumulative_weight(false)) / n1;
+ const double norm_cum_wt2 = static_cast<double>(it2.get_cumulative_weight(false)) / n2;
delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
if (comparator((*it1).first, (*it2).first)) {
++it1;
@@ -48,8 +48,8 @@ double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
++it2;
}
}
- const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
- const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>(it1.get_cumulative_weight(false)) / n1;
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>(it2.get_cumulative_weight(false)) / n2;
delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
return delta;
}
diff --git a/common/include/quantile_sketch_sorted_view.hpp b/common/include/quantile_sketch_sorted_view.hpp
index 9fb1693..f5e805e 100755
--- a/common/include/quantile_sketch_sorted_view.hpp
+++ b/common/include/quantile_sketch_sorted_view.hpp
@@ -40,7 +40,6 @@ public:
template<typename Iterator>
void add(Iterator begin, Iterator end, uint64_t weight);
- template<bool inclusive>
void convert_to_cummulative();
class const_iterator;
@@ -49,9 +48,10 @@ public:
size_t size() const;
- // makes sense only with cumulative weight
+ double get_rank(const T& item, bool inclusive = true) const;
+
using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
- quantile_return_type get_quantile(double rank) const;
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
private:
static inline const T& deref_helper(const T* t) { return *t; }
@@ -91,7 +91,7 @@ public:
using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
- const_iterator(const Base& it): Base(it) {}
+ const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
value_type operator*() const { return Base::operator*(); }
@@ -112,6 +112,18 @@ public:
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
return_value_holder operator->() const { return **this; }
+
+ uint64_t get_weight() const {
+ if (*this == begin) return Base::operator*().second;
+ return Base::operator*().second - (*this - 1).operator*().second;
+ }
+
+ uint64_t get_cumulative_weight(bool inclusive = true) const {
+ return inclusive ? Base::operator*().second : Base::operator*().second - get_weight();
+ }
+
+private:
+ Base begin;
};
} /* namespace datasketches */
diff --git a/common/include/quantile_sketch_sorted_view_impl.hpp b/common/include/quantile_sketch_sorted_view_impl.hpp
index 26eb283..ddb51f3 100755
--- a/common/include/quantile_sketch_sorted_view_impl.hpp
+++ b/common/include/quantile_sketch_sorted_view_impl.hpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include <stdexcept>
+#include <cmath>
namespace datasketches {
@@ -51,34 +52,42 @@ void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, ui
}
template<typename T, typename C, typename A>
-template<bool inclusive>
void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
- uint64_t subtotal = 0;
for (auto& entry: entries_) {
- const uint64_t new_subtotal = subtotal + entry.second;
- entry.second = inclusive ? new_subtotal : subtotal;
- subtotal = new_subtotal;
+ total_weight_ += entry.second;
+ entry.second = total_weight_;
}
- total_weight_ = subtotal;
}
template<typename T, typename C, typename A>
-auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
- if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
- uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
- auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
+double quantile_sketch_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
+ auto it = inclusive ?
+ std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first())
+ : std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first());
+ // we need item just before
+ if (it == entries_.begin()) return 0;
+ --it;
+ return static_cast<double>(it->second) / total_weight_;
+}
+
+template<typename T, typename C, typename A>
+auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
+ uint64_t weight = inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_;
+ auto it = inclusive ?
+ std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
+ : std::upper_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
return deref_helper(it->first);
}
template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
- return entries_.begin();
+ return const_iterator(entries_.begin(), entries_.begin());
}
template<typename T, typename C, typename A>
auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
- return entries_.end();
+ return const_iterator(entries_.end(), entries_.begin());
}
template<typename T, typename C, typename A>
diff --git a/common/test/CMakeLists.txt b/common/test/CMakeLists.txt
index a02d681..9f32dac 100644
--- a/common/test/CMakeLists.txt
+++ b/common/test/CMakeLists.txt
@@ -19,7 +19,7 @@
# and an integration test using the other parts of the library.
# common dependencies for tests
-add_library(common_test OBJECT "")
+add_library(common_test_lib OBJECT "")
include(FetchContent)
@@ -31,19 +31,19 @@ FetchContent_Declare(
FetchContent_MakeAvailable(Catch2)
-target_link_libraries(common_test PUBLIC Catch2::Catch2)
+target_link_libraries(common_test_lib PUBLIC Catch2::Catch2)
-set_target_properties(common_test PROPERTIES
+set_target_properties(common_test_lib PROPERTIES
CXX_STANDARD 11
CXX_STANDARD_REQUIRED YES
)
-target_include_directories(common_test
+target_include_directories(common_test_lib
INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}
)
-target_sources(common_test
+target_sources(common_test_lib
INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/test_type.hpp
@@ -52,10 +52,29 @@ target_sources(common_test
${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp
)
+add_executable(common_test)
+
+target_link_libraries(common_test common common_test_lib)
+
+set_target_properties(common_test PROPERTIES
+ CXX_STANDARD 11
+ CXX_STANDARD_REQUIRED YES
+)
+
+add_test(
+ NAME common_test
+ COMMAND common_test
+)
+
+target_sources(common_test
+ PRIVATE
+ quantile_sketch_sorted_view_test.cpp
+)
+
# now the integration test part
add_executable(integration_test)
-target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test)
+target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
set_target_properties(integration_test PROPERTIES
CXX_STANDARD 11
@@ -70,4 +89,4 @@ add_test(
target_sources(integration_test
PRIVATE
integration_test.cpp
-)
\ No newline at end of file
+)
diff --git a/common/test/quantile_sketch_sorted_view_test.cpp b/common/test/quantile_sketch_sorted_view_test.cpp
new file mode 100644
index 0000000..c84930a
--- /dev/null
+++ b/common/test/quantile_sketch_sorted_view_test.cpp
@@ -0,0 +1,444 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <catch2/catch.hpp>
+
+#include <vector>
+#include <utility>
+
+#include "quantile_sketch_sorted_view.hpp"
+
+namespace datasketches {
+
+TEST_CASE("set 0", "sorted view") {
+ auto view = quantile_sketch_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::allocator<float>());
+ std::vector<float> l0 {10};
+ view.add(l0.begin(), l0.end(), 1);
+ view.convert_to_cummulative();
+ REQUIRE(view.size() == 1);
+
+ auto it = view.begin();
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 1);
+ REQUIRE(it.get_weight() == 1);
+ REQUIRE(it.get_cumulative_weight() == 1);
+ REQUIRE(it.get_cumulative_weight(false) == 0);
+ ++it;
+ REQUIRE(it == view.end());
+
+ REQUIRE(view.get_rank(5, true) == 0);
+ REQUIRE(view.get_rank(10, true) == 1);
+ REQUIRE(view.get_rank(15, true) == 1);
+
+ REQUIRE(view.get_rank(5, false) == 0);
+ REQUIRE(view.get_rank(10, false) == 0);
+ REQUIRE(view.get_rank(15, false) == 1);
+
+ REQUIRE(view.get_quantile(0, true) == 10);
+ REQUIRE(view.get_quantile(0.5, true) == 10);
+ REQUIRE(view.get_quantile(1, true) == 10);
+
+ REQUIRE(view.get_quantile(0, false) == 10);
+ REQUIRE(view.get_quantile(0.5, false) == 10);
+ REQUIRE(view.get_quantile(1, false) == 10);
+}
+
+TEST_CASE("set 1", "sorted view") {
+ auto view = quantile_sketch_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::allocator<float>());
+ std::vector<float> l0 {10, 10};
+ view.add(l0.begin(), l0.end(), 1);
+ view.convert_to_cummulative();
+ REQUIRE(view.size() == 2);
+
+ auto it = view.begin();
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 1);
+ REQUIRE(it.get_weight() == 1);
+ REQUIRE(it.get_cumulative_weight() == 1);
+ REQUIRE(it.get_cumulative_weight(false) == 0);
+ ++it;
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 2);
+ REQUIRE(it.get_weight() == 1);
+ REQUIRE(it.get_cumulative_weight() == 2);
+ REQUIRE(it.get_cumulative_weight(false) == 1);
+ ++it;
+ REQUIRE(it == view.end());
+
+ REQUIRE(view.get_rank(5, true) == 0);
+ REQUIRE(view.get_rank(10, true) == 1);
+ REQUIRE(view.get_rank(15, true) == 1);
+
+ REQUIRE(view.get_rank(5, false) == 0);
+ REQUIRE(view.get_rank(10, false) == 0);
+ REQUIRE(view.get_rank(15, false) == 1);
+
+ REQUIRE(view.get_quantile(0, true) == 10);
+ REQUIRE(view.get_quantile(0.25, true) == 10);
+ REQUIRE(view.get_quantile(0.5, true) == 10);
+ REQUIRE(view.get_quantile(0.75, true) == 10);
+ REQUIRE(view.get_quantile(1, true) == 10);
+
+ REQUIRE(view.get_quantile(0, false) == 10);
+ REQUIRE(view.get_quantile(0.25, false) == 10);
+ REQUIRE(view.get_quantile(0.5, false) == 10);
+ REQUIRE(view.get_quantile(0.75, false) == 10);
+ REQUIRE(view.get_quantile(1, false) == 10);
+}
+
+TEST_CASE("set 2", "sorted view") {
+ auto view = quantile_sketch_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::allocator<float>());
+ std::vector<float> l1 {10, 20, 30, 40};
+ view.add(l1.begin(), l1.end(), 2);
+ view.convert_to_cummulative();
+ REQUIRE(view.size() == 4);
+
+ auto it = view.begin();
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 2);
+ REQUIRE(it.get_weight() == 2);
+ REQUIRE(it.get_cumulative_weight() == 2);
+ REQUIRE(it.get_cumulative_weight(false) == 0);
+ ++it;
+ REQUIRE(it->first == 20);
+ REQUIRE(it->second == 4);
+ REQUIRE(it.get_weight() == 2);
+ REQUIRE(it.get_cumulative_weight() == 4);
+ REQUIRE(it.get_cumulative_weight(false) == 2);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 6);
+ REQUIRE(it.get_weight() == 2);
+ REQUIRE(it.get_cumulative_weight() == 6);
+ REQUIRE(it.get_cumulative_weight(false) == 4);
+ ++it;
+ REQUIRE(it->first == 40);
+ REQUIRE(it->second == 8);
+ REQUIRE(it.get_weight() == 2);
+ REQUIRE(it.get_cumulative_weight() == 8);
+ REQUIRE(it.get_cumulative_weight(false) == 6);
+ ++it;
+ REQUIRE(it == view.end());
+
+ REQUIRE(view.get_rank(5, true) == 0);
+ REQUIRE(view.get_rank(10, true) == 0.25);
+ REQUIRE(view.get_rank(15, true) == 0.25);
+ REQUIRE(view.get_rank(20, true) == 0.5);
+ REQUIRE(view.get_rank(25, true) == 0.5);
+ REQUIRE(view.get_rank(30, true) == 0.75);
+ REQUIRE(view.get_rank(35, true) == 0.75);
+ REQUIRE(view.get_rank(40, true) == 1);
+ REQUIRE(view.get_rank(45, true) == 1);
+
+ REQUIRE(view.get_rank(5, false) == 0);
+ REQUIRE(view.get_rank(10, false) == 0);
+ REQUIRE(view.get_rank(15, false) == 0.25);
+ REQUIRE(view.get_rank(20, false) == 0.25);
+ REQUIRE(view.get_rank(25, false) == 0.5);
+ REQUIRE(view.get_rank(30, false) == 0.5);
+ REQUIRE(view.get_rank(35, false) == 0.75);
+ REQUIRE(view.get_rank(40, false) == 0.75);
+ REQUIRE(view.get_rank(45, false) == 1);
+
+ REQUIRE(view.get_quantile(0, true) == 10);
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
+ REQUIRE(view.get_quantile(0.125, true) == 10);
+ REQUIRE(view.get_quantile(0.1875, true) == 10);
+ REQUIRE(view.get_quantile(0.25, true) == 10);
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
+ REQUIRE(view.get_quantile(0.375, true) == 20);
+ REQUIRE(view.get_quantile(0.4375, true) == 20);
+ REQUIRE(view.get_quantile(0.5, true) == 20);
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
+ REQUIRE(view.get_quantile(0.625, true) == 30);
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
+ REQUIRE(view.get_quantile(0.75, true) == 30);
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
+ REQUIRE(view.get_quantile(0.875, true) == 40);
+ REQUIRE(view.get_quantile(0.9375, true) == 40);
+ REQUIRE(view.get_quantile(1, true) == 40);
+
+ REQUIRE(view.get_quantile(0, false) == 10);
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
+ REQUIRE(view.get_quantile(0.125, false) == 10);
+ REQUIRE(view.get_quantile(0.1875, false) == 10);
+ REQUIRE(view.get_quantile(0.25, false) == 20);
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
+ REQUIRE(view.get_quantile(0.375, false) == 20);
+ REQUIRE(view.get_quantile(0.4375, false) == 20);
+ REQUIRE(view.get_quantile(0.5, false) == 30);
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
+ REQUIRE(view.get_quantile(0.625, false) == 30);
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
+ REQUIRE(view.get_quantile(0.75, false) == 40);
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
+ REQUIRE(view.get_quantile(0.875, false) == 40);
+ REQUIRE(view.get_quantile(0.9375, false) == 40);
+ REQUIRE(view.get_quantile(1, false) == 40);
+}
+
+TEST_CASE("set 3", "sorted view") {
+ auto view = quantile_sketch_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::allocator<float>());
+ std::vector<float> l1 {10, 20, 20, 30, 30, 30, 40, 50};
+ view.add(l1.begin(), l1.end(), 2);
+ view.convert_to_cummulative();
+ REQUIRE(view.size() == 8);
+
+ auto it = view.begin();
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 2);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 20);
+ REQUIRE(it->second == 4);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 20);
+ REQUIRE(it->second == 6);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 8);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 10);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 12);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 40);
+ REQUIRE(it->second == 14);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 50);
+ REQUIRE(it->second == 16);
+ REQUIRE(it.get_weight() == 2);
+
+ REQUIRE(view.get_rank(5, true) == 0);
+ REQUIRE(view.get_rank(10, true) == 0.125);
+ REQUIRE(view.get_rank(15, true) == 0.125);
+ REQUIRE(view.get_rank(20, true) == 0.375);
+ REQUIRE(view.get_rank(25, true) == 0.375);
+ REQUIRE(view.get_rank(30, true) == 0.75);
+ REQUIRE(view.get_rank(35, true) == 0.75);
+ REQUIRE(view.get_rank(40, true) == 0.875);
+ REQUIRE(view.get_rank(45, true) == 0.875);
+ REQUIRE(view.get_rank(50, true) == 1);
+ REQUIRE(view.get_rank(55, true) == 1);
+
+ REQUIRE(view.get_rank(5, false) == 0);
+ REQUIRE(view.get_rank(10, false) == 0);
+ REQUIRE(view.get_rank(15, false) == 0.125);
+ REQUIRE(view.get_rank(20, false) == 0.125);
+ REQUIRE(view.get_rank(25, false) == 0.375);
+ REQUIRE(view.get_rank(30, false) == 0.375);
+ REQUIRE(view.get_rank(35, false) == 0.75);
+ REQUIRE(view.get_rank(40, false) == 0.75);
+ REQUIRE(view.get_rank(45, false) == 0.875);
+ REQUIRE(view.get_rank(50, false) == 0.875);
+ REQUIRE(view.get_rank(55, false) == 1);
+
+ REQUIRE(view.get_quantile(0, true) == 10);
+ REQUIRE(view.get_quantile(0.03125, true) == 10);
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
+ REQUIRE(view.get_quantile(0.09375, true) == 10);
+ REQUIRE(view.get_quantile(0.125, true) == 10);
+ REQUIRE(view.get_quantile(0.15625, true) == 20);
+ REQUIRE(view.get_quantile(0.1875, true) == 20);
+ REQUIRE(view.get_quantile(0.21875, true) == 20);
+ REQUIRE(view.get_quantile(0.25, true) == 20);
+ REQUIRE(view.get_quantile(0.28125, true) == 20);
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
+ REQUIRE(view.get_quantile(0.34375, true) == 20);
+ REQUIRE(view.get_quantile(0.375, true) == 20);
+ REQUIRE(view.get_quantile(0.40625, true) == 30);
+ REQUIRE(view.get_quantile(0.4375, true) == 30);
+ REQUIRE(view.get_quantile(0.46875, true) == 30);
+ REQUIRE(view.get_quantile(0.5, true) == 30);
+ REQUIRE(view.get_quantile(0.53125, true) == 30);
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
+ REQUIRE(view.get_quantile(0.59375, true) == 30);
+ REQUIRE(view.get_quantile(0.625, true) == 30);
+ REQUIRE(view.get_quantile(0.65625, true) == 30);
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
+ REQUIRE(view.get_quantile(0.71875, true) == 30);
+ REQUIRE(view.get_quantile(0.75, true) == 30);
+ REQUIRE(view.get_quantile(0.78125, true) == 40);
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
+ REQUIRE(view.get_quantile(0.84375, true) == 40);
+ REQUIRE(view.get_quantile(0.875, true) == 40);
+ REQUIRE(view.get_quantile(0.90625, true) == 50);
+ REQUIRE(view.get_quantile(0.9375, true) == 50);
+ REQUIRE(view.get_quantile(0.96875, true) == 50);
+ REQUIRE(view.get_quantile(1, true) == 50);
+
+ REQUIRE(view.get_quantile(0, false) == 10);
+ REQUIRE(view.get_quantile(0.03125, false) == 10);
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
+ REQUIRE(view.get_quantile(0.09375, false) == 10);
+ REQUIRE(view.get_quantile(0.125, false) == 20);
+ REQUIRE(view.get_quantile(0.15625, false) == 20);
+ REQUIRE(view.get_quantile(0.1875, false) == 20);
+ REQUIRE(view.get_quantile(0.21875, false) == 20);
+ REQUIRE(view.get_quantile(0.25, false) == 20);
+ REQUIRE(view.get_quantile(0.28125, false) == 20);
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
+ REQUIRE(view.get_quantile(0.34375, false) == 20);
+ REQUIRE(view.get_quantile(0.375, false) == 30);
+ REQUIRE(view.get_quantile(0.40625, false) == 30);
+ REQUIRE(view.get_quantile(0.4375, false) == 30);
+ REQUIRE(view.get_quantile(0.46875, false) == 30);
+ REQUIRE(view.get_quantile(0.5, false) == 30);
+ REQUIRE(view.get_quantile(0.53125, false) == 30);
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
+ REQUIRE(view.get_quantile(0.59375, false) == 30);
+ REQUIRE(view.get_quantile(0.625, false) == 30);
+ REQUIRE(view.get_quantile(0.65625, false) == 30);
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
+ REQUIRE(view.get_quantile(0.71875, false) == 30);
+ REQUIRE(view.get_quantile(0.75, false) == 40);
+ REQUIRE(view.get_quantile(0.78125, false) == 40);
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
+ REQUIRE(view.get_quantile(0.84375, false) == 40);
+ REQUIRE(view.get_quantile(0.875, false) == 50);
+ REQUIRE(view.get_quantile(0.90625, false) == 50);
+ REQUIRE(view.get_quantile(0.9375, false) == 50);
+ REQUIRE(view.get_quantile(0.96875, false) == 50);
+ REQUIRE(view.get_quantile(1, false) == 50);
+}
+
+TEST_CASE("set 4", "sorted view") {
+ auto view = quantile_sketch_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::allocator<float>());
+ std::vector<float> l1 {10, 20, 30, 40};
+ view.add(l1.begin(), l1.end(), 2);
+ std::vector<float> l0 {10, 20, 30, 40};
+ view.add(l0.begin(), l0.end(), 1);
+ view.convert_to_cummulative();
+ REQUIRE(view.size() == 8);
+
+ auto it = view.begin();
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 2);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 10);
+ REQUIRE(it->second == 3);
+ REQUIRE(it.get_weight() == 1);
+ ++it;
+ REQUIRE(it->first == 20);
+ REQUIRE(it->second == 5);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 20);
+ REQUIRE(it->second == 6);
+ REQUIRE(it.get_weight() == 1);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 8);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 30);
+ REQUIRE(it->second == 9);
+ REQUIRE(it.get_weight() == 1);
+ ++it;
+ REQUIRE(it->first == 40);
+ REQUIRE(it->second == 11);
+ REQUIRE(it.get_weight() == 2);
+ ++it;
+ REQUIRE(it->first == 40);
+ REQUIRE(it->second == 12);
+ REQUIRE(it.get_weight() == 1);
+
+ REQUIRE(view.get_rank(5, true) == 0);
+ REQUIRE(view.get_rank(10, true) == 0.25);
+ REQUIRE(view.get_rank(15, true) == 0.25);
+ REQUIRE(view.get_rank(20, true) == 0.5);
+ REQUIRE(view.get_rank(25, true) == 0.5);
+ REQUIRE(view.get_rank(30, true) == 0.75);
+ REQUIRE(view.get_rank(35, true) == 0.75);
+ REQUIRE(view.get_rank(40, true) == 1);
+ REQUIRE(view.get_rank(45, true) == 1);
+
+ REQUIRE(view.get_rank(5, false) == 0);
+ REQUIRE(view.get_rank(10, false) == 0);
+ REQUIRE(view.get_rank(15, false) == 0.25);
+ REQUIRE(view.get_rank(20, false) == 0.25);
+ REQUIRE(view.get_rank(25, false) == 0.5);
+ REQUIRE(view.get_rank(30, false) == 0.5);
+ REQUIRE(view.get_rank(35, false) == 0.75);
+ REQUIRE(view.get_rank(40, false) == 0.75);
+ REQUIRE(view.get_rank(45, false) == 1);
+
+ REQUIRE(view.get_quantile(0, true) == 10);
+ REQUIRE(view.get_quantile(0.0417, true) == 10);
+ REQUIRE(view.get_quantile(0.0833, true) == 10);
+ REQUIRE(view.get_quantile(0.125, true) == 10);
+ REQUIRE(view.get_quantile(0.1667, true) == 10);
+ REQUIRE(view.get_quantile(0.2083, true) == 10);
+ REQUIRE(view.get_quantile(0.25, true) == 10);
+ REQUIRE(view.get_quantile(0.2917, true) == 20);
+ REQUIRE(view.get_quantile(0.3333, true) == 20);
+ REQUIRE(view.get_quantile(0.375, true) == 20);
+ REQUIRE(view.get_quantile(0.4167, true) == 20);
+ REQUIRE(view.get_quantile(0.4583, true) == 20);
+ REQUIRE(view.get_quantile(0.5, true) == 20);
+ REQUIRE(view.get_quantile(0.5417, true) == 30);
+ REQUIRE(view.get_quantile(0.5833, true) == 30);
+ REQUIRE(view.get_quantile(0.625, true) == 30);
+ REQUIRE(view.get_quantile(0.6667, true) == 30);
+ REQUIRE(view.get_quantile(0.7083, true) == 30);
+ REQUIRE(view.get_quantile(0.75, true) == 30);
+ REQUIRE(view.get_quantile(0.7917, true) == 40);
+ REQUIRE(view.get_quantile(0.8333, true) == 40);
+ REQUIRE(view.get_quantile(0.875, true) == 40);
+ REQUIRE(view.get_quantile(0.9167, true) == 40);
+ REQUIRE(view.get_quantile(0.9583, true) == 40);
+ REQUIRE(view.get_quantile(1, true) == 40);
+
+ REQUIRE(view.get_quantile(0, false) == 10);
+ REQUIRE(view.get_quantile(0.0417, false) == 10);
+ REQUIRE(view.get_quantile(0.0833, false) == 10);
+ REQUIRE(view.get_quantile(0.125, false) == 10);
+ REQUIRE(view.get_quantile(0.1667, false) == 10);
+ REQUIRE(view.get_quantile(0.2083, false) == 10);
+ REQUIRE(view.get_quantile(0.25, false) == 20);
+ REQUIRE(view.get_quantile(0.2917, false) == 20);
+ REQUIRE(view.get_quantile(0.3333, false) == 20);
+ REQUIRE(view.get_quantile(0.375, false) == 20);
+ REQUIRE(view.get_quantile(0.4167, false) == 20);
+ REQUIRE(view.get_quantile(0.4583, false) == 20);
+ REQUIRE(view.get_quantile(0.5, false) == 30);
+ REQUIRE(view.get_quantile(0.5417, false) == 30);
+ REQUIRE(view.get_quantile(0.5833, false) == 30);
+ REQUIRE(view.get_quantile(0.625, false) == 30);
+ REQUIRE(view.get_quantile(0.6667, false) == 30);
+ REQUIRE(view.get_quantile(0.7083, false) == 30);
+ REQUIRE(view.get_quantile(0.75, false) == 40);
+ REQUIRE(view.get_quantile(0.7917, false) == 40);
+ REQUIRE(view.get_quantile(0.8333, false) == 40);
+ REQUIRE(view.get_quantile(0.875, false) == 40);
+ REQUIRE(view.get_quantile(0.9167, false) == 40);
+ REQUIRE(view.get_quantile(0.9583, false) == 40);
+ REQUIRE(view.get_quantile(1, false) == 40);
+}
+
+} /* namespace datasketches */
diff --git a/cpc/test/CMakeLists.txt b/cpc/test/CMakeLists.txt
index 9ffce32..5fe402e 100644
--- a/cpc/test/CMakeLists.txt
+++ b/cpc/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(cpc_test)
-target_link_libraries(cpc_test cpc common_test)
+target_link_libraries(cpc_test cpc common_test_lib)
set_target_properties(cpc_test PROPERTIES
CXX_STANDARD 11
diff --git a/fi/test/CMakeLists.txt b/fi/test/CMakeLists.txt
index 7a821cd..bfdeeaa 100644
--- a/fi/test/CMakeLists.txt
+++ b/fi/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(fi_test)
-target_link_libraries(fi_test fi common_test)
+target_link_libraries(fi_test fi common_test_lib)
set_target_properties(fi_test PROPERTIES
CXX_STANDARD 11
diff --git a/hll/test/CMakeLists.txt b/hll/test/CMakeLists.txt
index 75a084e..b7ae41b 100644
--- a/hll/test/CMakeLists.txt
+++ b/hll/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(hll_test)
-target_link_libraries(hll_test hll common_test)
+target_link_libraries(hll_test hll common_test_lib)
set_target_properties(hll_test PROPERTIES
CXX_STANDARD 11
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index ef6146b..4a76e34 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -20,14 +20,12 @@
#ifndef KLL_SKETCH_HPP_
#define KLL_SKETCH_HPP_
-#include <functional>
#include <memory>
#include <vector>
-#include <cmath>
-#include "quantile_sketch_sorted_view.hpp"
#include "common_defs.hpp"
#include "serde.hpp"
+#include "quantile_sketch_sorted_view.hpp"
namespace datasketches {
@@ -161,7 +159,6 @@ namespace kll_constants {
template <
typename T,
typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
- typename S = serde<T>, // deprecated, to be removed in the next major version
typename A = std::allocator<T>
>
class kll_sketch {
@@ -170,8 +167,6 @@ class kll_sketch {
using comparator = C;
static const uint8_t DEFAULT_M = 8;
- // TODO: Redundant and deprecated. Will be removed in next major version.
- static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
static const uint16_t MIN_K = DEFAULT_M;
static const uint16_t MAX_K = (1 << 16) - 1;
@@ -187,15 +182,15 @@ class kll_sketch {
* @param other sketch of a different type
* @param allocator instance of an Allocator
*/
- template<typename TT, typename CC, typename SS, typename AA>
- explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
+ template<typename TT, typename CC, typename AA>
+ explicit kll_sketch(const kll_sketch<TT, CC, AA>& other, const A& allocator = A());
/**
* Updates this sketch with the given data item.
- * @param value an item from a stream of items
+ * @param item from a stream of items
*/
template<typename FwdT>
- void update(FwdT&& value);
+ void update(FwdT&& item);
/**
* Merges another sketch into this one.
@@ -235,20 +230,20 @@ class kll_sketch {
bool is_estimation_mode() const;
/**
- * Returns the min value of the stream.
+ * Returns the min item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the min value of the stream
+ * @return the min item of the stream
*/
- T get_min_value() const;
+ T get_min_item() const;
/**
- * Returns the max value of the stream.
+ * Returns the max item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the max value of the stream
+ * @return the max item of the stream
*/
- T get_max_value() const;
+ T get_max_item() const;
/**
* Returns an instance of the comparator for this sketch.
@@ -257,134 +252,114 @@ class kll_sketch {
C get_comparator() const;
/**
- * Returns an approximation to the value of the data item
- * that would be preceded by the given fraction of a hypothetical sorted
- * version of the input stream so far.
- * <p>
- * Note that this method has a fairly large overhead (microseconds instead of nanoseconds)
- * so it should not be called multiple times to get different quantiles from the same
- * sketch. Instead use get_quantiles(), which pays the overhead only once.
+ * Returns an item from the sketch that is the best approximation to an item
+ * from the original stream with the given rank.
* <p>
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
*
- * @param fraction the specified fractional position in the hypothetical sorted stream.
- * These are also called normalized ranks or fractional ranks.
- * If fraction = 0.0, the true minimum value of the stream is returned.
- * If fraction = 1.0, the true maximum value of the stream is returned.
- * If the parameter inclusive=true, the given rank is considered inclusive (includes the weight of an item)
+ * @param rank of an item in the hypothetical sorted stream.
+ * @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
*
- * @return the approximation to the value at the given fraction
+ * @return approximate quantile associated with the given rank
*/
using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
- template<bool inclusive = false>
- quantile_return_type get_quantile(double fraction) const;
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
/**
- * This is a more efficient multiple-query version of get_quantile().
- * <p>
* This returns an array that could have been generated by using get_quantile() for each
- * fractional rank separately, but would be very inefficient.
- * This method incurs the internal set-up overhead once and obtains multiple quantile values in
- * a single query. It is strongly recommend that this method be used instead of multiple calls
- * to get_quantile().
+ * rank separately.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param fractions given array of fractional positions in the hypothetical sorted stream.
- * These are also called normalized ranks or fractional ranks.
- * These fractions must be in the interval [0.0, 1.0], inclusive.
- * If the parameter inclusive=true, the given fractions are considered inclusive (include weights of items)
+ * @param ranks given array of ranks in the hypothetical sorted stream.
+ * These ranks must be in the interval [0.0, 1.0].
+ * @param inclusive if true, the given ranks are considered inclusive (include weights of items)
*
- * @return array of approximations to the given fractions in the same order as given fractions
- * in the input array.
+ * @return array of approximate quantiles corresponding to the given ranks in the same order.
*/
- template<bool inclusive = false>
- std::vector<T, A> get_quantiles(const double* fractions, uint32_t size) const;
+ std::vector<T, A> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
/**
* This is a multiple-query version of get_quantile() that allows the caller to
- * specify the number of evenly-spaced fractional ranks.
+ * specify the number of evenly-spaced ranks.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param num an integer that specifies the number of evenly-spaced fractional ranks.
- * This must be an integer greater than 0. A value of 1 will return the min value.
- * A value of 2 will return the min and the max value. A value of 3 will return the min,
- * the median and the max value, etc.
+ * @param num an integer that specifies the number of evenly-spaced ranks.
+ * This must be an integer greater than 0. A value of 1 will return the quantile of rank 0.
+ * A value of 2 will return quantiles of ranks 0 and 1. A value of 3 will return quantiles of ranks 0,
+ * 0.5 (median) and 1, etc.
+ * @param inclusive if true, the ranks are considered inclusive (include weights of items)
*
- * @return array of approximations to the given number of evenly-spaced fractional ranks.
+ * @return array of approximate quantiles corresponding to the given number of evenly-spaced ranks.
*/
- template<bool inclusive = false>
- std::vector<T, A> get_quantiles(uint32_t num) const;
+ std::vector<T, A> get_quantiles(uint32_t num, bool inclusive = true) const;
/**
- * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
- * inclusive.
- * With the template parameter inclusive=true the weight of the given value is included into the rank.
- * Otherwise the rank equals the sum of the weights of all values that are less than the given value
- * according to the comparator C.
+ * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
*
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
- * <p>If the sketch is empty this returns NaN.
+ * <p>If the sketch is empty the result is undefined (NaN).
*
- * @param value to be ranked
- * @return an approximate rank of the given value
+ * @param item to be ranked.
+ * @param inclusive if true the weight of the given item is included into the rank.
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
+ * according to the comparator C.
+ *
+ * @return an approximate rank of the given item
*/
- template<bool inclusive = false>
- double get_rank(const T& value) const;
+ double get_rank(const T& item, bool inclusive = true) const;
/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
- * given a set of split points (values).
+ * given a set of split points (items).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(true) function.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * The definition of an "interval" is inclusive of the left split point (or minimum value) and
- * exclusive of the right split point, with the exception that the last interval will include
- * the maximum value.
- * It is not necessary to include either the min or max values in these split points.
+ * @param size the number of split points in the array
*
- * @return an array of m+1 doubles each of which is an approximation
- * to the fraction of the input stream values (the mass) that fall into one of those intervals.
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
* split point, with the exception that the last interval will include the maximum value.
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
* split point.
+ *
+ * @return an array of m+1 doubles each of which is an approximation
+ * to the fraction of the input stream items (the mass) that fall into one of those intervals.
*/
- template<bool inclusive = false>
- vector_d<A> get_PMF(const T* split_points, uint32_t size) const;
+ vector_d<A> get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
- * cumulative analog of the PMF, of the input stream given a set of split points (values).
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * The definition of an "interval" is inclusive of the left split point (or minimum value) and
- * exclusive of the right split point, with the exception that the last interval will include
- * the maximum value.
- * It is not necessary to include either the min or max values in these split points.
+ * @param size the number of split points in the array
*
- * @return an array of m+1 double values, which are a consecutive approximation to the CDF
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * split point, with the exception that the last interval will include the maximum value.
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ * split point.
+ *
+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
* of the input stream given the split_points. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
*/
- template<bool inclusive = false>
- vector_d<A> get_CDF(const T* split_points, uint32_t size) const;
+ vector_d<A> get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Gets the approximate rank error of this sketch normalized as a fraction between zero and one.
@@ -401,7 +376,7 @@ class kll_sketch {
* @param serde instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
/**
@@ -410,7 +385,7 @@ class kll_sketch {
* @param serde instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
/**
@@ -445,7 +420,7 @@ class kll_sketch {
* @param os output stream
* @param instance of a SerDe
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
// This is a convenience alias for users
@@ -461,19 +436,9 @@ class kll_sketch {
* @param instance of a SerDe
* @return serialized sketch as a vector of bytes
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
- /**
- * This method deserializes a sketch from a given stream.
- * @param is input stream
- * @param allocator instance of an Allocator
- * @return an instance of a sketch
- *
- * Deprecated, to be removed in the next major version
- */
- static kll_sketch deserialize(std::istream& is, const A& allocator = A());
-
/**
* This method deserializes a sketch from a given stream.
* @param is input stream
@@ -481,20 +446,9 @@ class kll_sketch {
* @param allocator instance of an Allocator
* @return an instance of a sketch
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
- /**
- * This method deserializes a sketch from a given array of bytes.
- * @param bytes pointer to the array of bytes
- * @param size the size of the array
- * @param allocator instance of an Allocator
- * @return an instance of a sketch
- *
- * Deprecated, to be removed in the next major version
- */
- static kll_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
-
/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
@@ -503,7 +457,7 @@ class kll_sketch {
* @param allocator instance of an Allocator
* @return an instance of a sketch
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
/*
@@ -526,14 +480,7 @@ class kll_sketch {
const_iterator begin() const;
const_iterator end() const;
- template<bool inclusive = false>
- quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;
-
- #ifdef KLL_VALIDATION
- uint8_t get_num_levels() { return num_levels_; }
- uint32_t* get_levels() { return levels_; }
- T* get_items() { return items_; }
- #endif
+ quantile_sketch_sorted_view<T, C, A> get_sorted_view() const;
private:
/* Serialized sketch layout:
@@ -563,14 +510,15 @@ class kll_sketch {
uint16_t k_;
uint8_t m_; // minimum buffer "width"
uint16_t min_k_; // for error estimation after merging with different k
- uint64_t n_;
uint8_t num_levels_;
+ bool is_level_zero_sorted_;
+ uint64_t n_;
vector_u32<A> levels_;
T* items_;
uint32_t items_size_;
T* min_value_;
T* max_value_;
- bool is_level_zero_sorted_;
+ mutable quantile_sketch_sorted_view<T, C, A>* sorted_view_;
// for deserialization
class item_deleter;
@@ -591,15 +539,6 @@ class kll_sketch {
void add_empty_top_level_to_completely_full_sketch();
void sort_level_zero();
- template<bool inclusive>
- vector_d<A> get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const;
- template<bool inclusive>
- void increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
- const T* split_points, uint32_t size, double* buckets) const;
- template<bool inclusive>
- void increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
- const T* split_points, uint32_t size, double* buckets) const;
-
template<typename O> void merge_higher_levels(O&& other, uint64_t final_n);
template<typename FwdSk>
@@ -640,14 +579,16 @@ class kll_sketch {
}
// for type converting constructor
- template<typename TT, typename CC, typename SS, typename AA>
- friend class kll_sketch;
+ template<typename TT, typename CC, typename AA> friend class kll_sketch;
+
+ void setup_sorted_view() const; // modifies mutable state
+ void reset_sorted_view();
};
-template<typename T, typename C, typename S, typename A>
-class kll_sketch<T, C, S, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+template<typename T, typename C, typename A>
+class kll_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
public:
- friend class kll_sketch<T, C, S, A>;
+ friend class kll_sketch<T, C, A>;
const_iterator& operator++();
const_iterator& operator++(int);
bool operator==(const const_iterator& other) const;
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index 8b620ad..2488fd7 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -32,20 +32,21 @@
namespace datasketches {
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::kll_sketch(uint16_t k, const A& allocator):
allocator_(allocator),
k_(k),
m_(DEFAULT_M),
min_k_(k),
-n_(0),
num_levels_(1),
+is_level_zero_sorted_(false),
+n_(0),
levels_(2, 0, allocator),
items_(nullptr),
items_size_(k_),
min_value_(nullptr),
max_value_(nullptr),
-is_level_zero_sorted_(false)
+sorted_view_(nullptr)
{
if (k < MIN_K || k > MAX_K) {
throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
@@ -54,20 +55,21 @@ is_level_zero_sorted_(false)
items_ = allocator_.allocate(items_size_);
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::kll_sketch(const kll_sketch& other):
allocator_(other.allocator_),
k_(other.k_),
m_(other.m_),
min_k_(other.min_k_),
-n_(other.n_),
num_levels_(other.num_levels_),
+is_level_zero_sorted_(other.is_level_zero_sorted_),
+n_(other.n_),
levels_(other.levels_),
items_(nullptr),
items_size_(other.items_size_),
min_value_(nullptr),
max_value_(nullptr),
-is_level_zero_sorted_(other.is_level_zero_sorted_)
+sorted_view_(nullptr)
{
items_ = allocator_.allocate(items_size_);
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
@@ -75,63 +77,66 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::kll_sketch(kll_sketch&& other) noexcept:
allocator_(std::move(other.allocator_)),
k_(other.k_),
m_(other.m_),
min_k_(other.min_k_),
-n_(other.n_),
num_levels_(other.num_levels_),
+is_level_zero_sorted_(other.is_level_zero_sorted_),
+n_(other.n_),
levels_(std::move(other.levels_)),
items_(other.items_),
items_size_(other.items_size_),
min_value_(other.min_value_),
max_value_(other.max_value_),
-is_level_zero_sorted_(other.is_level_zero_sorted_)
+sorted_view_(nullptr)
{
other.items_ = nullptr;
other.min_value_ = nullptr;
other.max_value_ = nullptr;
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
- kll_sketch<T, C, S, A> copy(other);
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(const kll_sketch& other) {
+ kll_sketch copy(other);
std::swap(allocator_, copy.allocator_);
std::swap(k_, copy.k_);
std::swap(m_, copy.m_);
std::swap(min_k_, copy.min_k_);
- std::swap(n_, copy.n_);
std::swap(num_levels_, copy.num_levels_);
+ std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
+ std::swap(n_, copy.n_);
std::swap(levels_, copy.levels_);
std::swap(items_, copy.items_);
std::swap(items_size_, copy.items_size_);
std::swap(min_value_, copy.min_value_);
std::swap(max_value_, copy.max_value_);
- std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
+ reset_sorted_view();
return *this;
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(kll_sketch&& other) {
std::swap(allocator_, other.allocator_);
std::swap(k_, other.k_);
std::swap(m_, other.m_);
std::swap(min_k_, other.min_k_);
- std::swap(n_, other.n_);
std::swap(num_levels_, other.num_levels_);
+ std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
+ std::swap(n_, other.n_);
std::swap(levels_, other.levels_);
std::swap(items_, other.items_);
std::swap(items_size_, other.items_size_);
std::swap(min_value_, other.min_value_);
std::swap(max_value_, other.max_value_);
- std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
+ reset_sorted_view();
return *this;
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::~kll_sketch() {
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::~kll_sketch() {
if (items_ != nullptr) {
const uint32_t begin = levels_[0];
const uint32_t end = levels_[num_levels_];
@@ -146,23 +151,25 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
max_value_->~T();
allocator_.deallocate(max_value_, 1);
}
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
-template<typename TT, typename CC, typename SS, typename AA>
-kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
+template<typename T, typename C, typename A>
+template<typename TT, typename CC, typename AA>
+kll_sketch<T, C, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const A& allocator):
allocator_(allocator),
k_(other.k_),
m_(other.m_),
min_k_(other.min_k_),
-n_(other.n_),
num_levels_(other.num_levels_),
+is_level_zero_sorted_(other.is_level_zero_sorted_),
+n_(other.n_),
levels_(other.levels_, allocator_),
items_(nullptr),
items_size_(other.items_size_),
min_value_(nullptr),
max_value_(nullptr),
-is_level_zero_sorted_(other.is_level_zero_sorted_)
+sorted_view_(nullptr)
{
static_assert(
std::is_constructible<T, TT>::value,
@@ -175,17 +182,18 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
check_sorting();
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename FwdT>
-void kll_sketch<T, C, S, A>::update(FwdT&& value) {
+void kll_sketch<T, C, A>::update(FwdT&& value) {
if (!check_update_value(value)) { return; }
update_min_max(value);
const uint32_t index = internal_update();
new (&items_[index]) T(std::forward<FwdT>(value));
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::update_min_max(const T& value) {
if (is_empty()) {
min_value_ = new (allocator_.allocate(1)) T(value);
max_value_ = new (allocator_.allocate(1)) T(value);
@@ -195,17 +203,17 @@ void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
}
}
-template<typename T, typename C, typename S, typename A>
-uint32_t kll_sketch<T, C, S, A>::internal_update() {
+template<typename T, typename C, typename A>
+uint32_t kll_sketch<T, C, A>::internal_update() {
if (levels_[0] == 0) compress_while_updating();
n_++;
is_level_zero_sorted_ = false;
return --levels_[0];
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename FwdSk>
-void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
+void kll_sketch<T, C, A>::merge(FwdSk&& other) {
if (other.is_empty()) return;
if (m_ != other.m_) {
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
@@ -226,149 +234,137 @@ void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
n_ = final_n;
if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
assert_correct_total_weight();
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
-bool kll_sketch<T, C, S, A>::is_empty() const {
+template<typename T, typename C, typename A>
+bool kll_sketch<T, C, A>::is_empty() const {
return n_ == 0;
}
-template<typename T, typename C, typename S, typename A>
-uint16_t kll_sketch<T, C, S, A>::get_k() const {
+template<typename T, typename C, typename A>
+uint16_t kll_sketch<T, C, A>::get_k() const {
return k_;
}
-template<typename T, typename C, typename S, typename A>
-uint64_t kll_sketch<T, C, S, A>::get_n() const {
+template<typename T, typename C, typename A>
+uint64_t kll_sketch<T, C, A>::get_n() const {
return n_;
}
-template<typename T, typename C, typename S, typename A>
-uint32_t kll_sketch<T, C, S, A>::get_num_retained() const {
+template<typename T, typename C, typename A>
+uint32_t kll_sketch<T, C, A>::get_num_retained() const {
return levels_[num_levels_] - levels_[0];
}
-template<typename T, typename C, typename S, typename A>
-bool kll_sketch<T, C, S, A>::is_estimation_mode() const {
+template<typename T, typename C, typename A>
+bool kll_sketch<T, C, A>::is_estimation_mode() const {
return num_levels_ > 1;
}
-template<typename T, typename C, typename S, typename A>
-T kll_sketch<T, C, S, A>::get_min_value() const {
+template<typename T, typename C, typename A>
+T kll_sketch<T, C, A>::get_min_item() const {
if (is_empty()) return get_invalid_value();
return *min_value_;
}
-template<typename T, typename C, typename S, typename A>
-T kll_sketch<T, C, S, A>::get_max_value() const {
+template<typename T, typename C, typename A>
+T kll_sketch<T, C, A>::get_max_item() const {
if (is_empty()) return get_invalid_value();
return *max_value_;
}
-template<typename T, typename C, typename S, typename A>
-C kll_sketch<T, C, S, A>::get_comparator() const {
+template<typename T, typename C, typename A>
+C kll_sketch<T, C, A>::get_comparator() const {
return C();
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-auto kll_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
+template<typename T, typename C, typename A>
+auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
if (is_empty()) return get_invalid_value();
- if (rank == 0.0) return *min_value_;
- if (rank == 1.0) return *max_value_;
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
+ throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
}
// may have a side effect of sorting level zero if needed
- return get_sorted_view<inclusive>(true).get_quantile(rank);
+ setup_sorted_view();
+ return sorted_view_->get_quantile(rank, inclusive);
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
+template<typename T, typename C, typename A>
+std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
std::vector<T, A> quantiles(allocator_);
if (is_empty()) return quantiles;
quantiles.reserve(size);
// may have a side effect of sorting level zero if needed
- auto view = get_sorted_view<inclusive>(true);
+ setup_sorted_view();
for (uint32_t i = 0; i < size; i++) {
const double rank = ranks[i];
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
- }
- else if (rank == 0.0) quantiles.push_back(*min_value_);
- else if (rank == 1.0) quantiles.push_back(*max_value_);
- else {
- quantiles.push_back(view.get_quantile(rank));
+ throw std::invalid_argument("normalized rank cannot be less than 0 or greater than 1");
}
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
}
return quantiles;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
+template<typename T, typename C, typename A>
+std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
if (is_empty()) return std::vector<T, A>(allocator_);
if (num == 0) {
throw std::invalid_argument("num must be > 0");
}
- vector_d<A> fractions(num, 0, allocator_);
- fractions[0] = 0.0;
+ vector_d<A> ranks(num, 0, allocator_);
+ ranks[0] = 0.0;
for (size_t i = 1; i < num; i++) {
- fractions[i] = static_cast<double>(i) / (num - 1);
+ ranks[i] = static_cast<double>(i) / (num - 1);
}
if (num > 1) {
- fractions[num - 1] = 1.0;
+ ranks[num - 1] = 1.0;
}
- return get_quantiles<inclusive>(fractions.data(), num);
+ return get_quantiles(ranks.data(), num, inclusive);
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
+template<typename T, typename C, typename A>
+double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
- uint8_t level = 0;
- uint64_t weight = 1;
- uint64_t total = 0;
- while (level < num_levels_) {
- const auto from_index = levels_[level];
- const auto to_index = levels_[level + 1]; // exclusive
- for (uint32_t i = from_index; i < to_index; i++) {
- if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
- total += weight;
- } else if ((level > 0) || is_level_zero_sorted_) {
- break; // levels above 0 are sorted, no point comparing further
- }
- }
- level++;
- weight *= 2;
- }
- return (double) total / n_;
+ setup_sorted_view();
+ return sorted_view_->get_rank(item, inclusive);
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
- return get_PMF_or_CDF<inclusive>(split_points, size, false);
+template<typename T, typename C, typename A>
+vector_d<A> kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const {
+ auto buckets = get_CDF(split_points, size, inclusive);
+ if (buckets.size() > 0) {
+ for (uint32_t i = size; i > 0; --i) {
+ buckets[i] -= buckets[i - 1];
+ }
+ }
+ return buckets;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
- return get_PMF_or_CDF<inclusive>(split_points, size, true);
+template<typename T, typename C, typename A>
+vector_d<A> kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const {
+ if (is_empty()) return vector_d<A>(allocator_);
+ kll_helper::validate_values<T, C>(split_points, size);
+ vector_d<A> buckets(size + 1, 0, allocator_);
+ for (uint32_t i = 0; i < size; ++i) {
+ buckets[i] = get_rank(split_points[i], inclusive);
+ }
+ buckets[size] = 1;
+ return buckets;
}
-template<typename T, typename C, typename S, typename A>
-double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
+template<typename T, typename C, typename A>
+double kll_sketch<T, C, A>::get_normalized_rank_error(bool pmf) const {
return get_normalized_rank_error(min_k_, pmf);
}
// implementation for fixed-size arithmetic types (integral and floating point)
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
-size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
+size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe&) const {
if (is_empty()) { return EMPTY_SIZE_BYTES; }
if (num_levels_ == 1 && get_num_retained() == 1) {
return DATA_START_SINGLE_ITEM + sizeof(TT);
@@ -378,9 +374,9 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
}
// implementation for all other types
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
-size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
+size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
if (is_empty()) { return EMPTY_SIZE_BYTES; }
if (num_levels_ == 1 && get_num_retained() == 1) {
return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
@@ -394,9 +390,9 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
}
// implementation for fixed-size arithmetic types (integral and floating point)
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
-size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
+size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
// the last integer in the levels_ array is not serialized because it can be derived
@@ -404,18 +400,18 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
}
// implementation for all other types
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
-size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
+size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
// the last integer in the levels_ array is not serialized because it can be derived
return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
+void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
const bool is_single_item = n_ == 1;
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
write(os, preamble_ints);
@@ -446,9 +442,9 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
sd.serialize(os, &items_[levels_[0]], get_num_retained());
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
+vector_u8<A> kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
const bool is_single_item = n_ == 1;
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
vector_u8<A> bytes(size, 0, allocator_);
@@ -487,14 +483,9 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const
return bytes;
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
- return deserialize(is, S(), allocator);
-}
-
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
+kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
const auto preamble_ints = read<uint8_t>(is);
const auto serial_version = read<uint8_t>(is);
const auto family_id = read<uint8_t>(is);
@@ -570,14 +561,9 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
std::move(min_value), std::move(max_value), is_level_zero_sorted);
}
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
- return deserialize(bytes, size, S(), allocator);
-}
-
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
+kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
uint8_t preamble_ints;
@@ -601,7 +587,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
- if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
+ if (is_empty) return kll_sketch(k, allocator);
uint64_t n;
uint16_t min_k;
@@ -669,36 +655,37 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
* Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
*/
-template<typename T, typename C, typename S, typename A>
-double kll_sketch<T, C, S, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
+template<typename T, typename C, typename A>
+double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
return pmf
? 2.446 / pow(k, 0.9433)
: 2.296 / pow(k, 0.9723);
}
// for deserialization
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
allocator_(levels.get_allocator()),
k_(k),
m_(DEFAULT_M),
min_k_(min_k),
-n_(n),
num_levels_(num_levels),
+is_level_zero_sorted_(is_level_zero_sorted),
+n_(n),
levels_(std::move(levels)),
items_(items.release()),
items_size_(items_size),
min_value_(min_value.release()),
max_value_(max_value.release()),
-is_level_zero_sorted_(is_level_zero_sorted)
+sorted_view_(nullptr)
{}
// The following code is only valid in the special case of exactly reaching capacity while updating.
// It cannot be used while merging, while reducing k, or anything else.
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::compress_while_updating(void) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::compress_while_updating(void) {
const uint8_t level = find_level_to_compact();
// It is important to add the new top level right here. Be aware that this operation
@@ -751,8 +738,8 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
}
-template<typename T, typename C, typename S, typename A>
-uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
+template<typename T, typename C, typename A>
+uint8_t kll_sketch<T, C, A>::find_level_to_compact() const {
uint8_t level = 0;
while (true) {
if (level >= num_levels_) throw std::logic_error("capacity calculation error");
@@ -765,8 +752,8 @@ uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::add_empty_top_level_to_completely_full_sketch() {
const uint32_t cur_total_cap = levels_[num_levels_];
// make sure that we are following a certain growth scheme
@@ -800,16 +787,16 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::sort_level_zero() {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::sort_level_zero() {
if (!is_level_zero_sorted_) {
std::sort(items_ + levels_[0], items_ + levels_[1], C());
is_level_zero_sorted_ = true;
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::check_sorting() const {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::check_sorting() const {
// not checking level 0
for (uint8_t level = 1; level < num_levels_; ++level) {
const auto from = items_ + levels_[level];
@@ -820,9 +807,8 @@ void kll_sketch<T, C, S, A>::check_sorting() const {
}
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
+template<typename T, typename C, typename A>
+quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, A>::get_sorted_view() const {
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
for (uint8_t level = 0; level < num_levels_; ++level) {
@@ -830,86 +816,13 @@ quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(boo
const auto to = items_ + levels_[level + 1]; // exclusive
view.add(from, to, 1 << level);
}
- if (cumulative) view.template convert_to_cummulative<inclusive>();
+ view.convert_to_cummulative();
return view;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
- if (is_empty()) return vector_d<A>(allocator_);
- kll_helper::validate_values<T, C>(split_points, size);
- vector_d<A> buckets(size + 1, 0, allocator_);
- uint8_t level = 0;
- uint64_t weight = 1;
- while (level < num_levels_) {
- const auto from_index = levels_[level];
- const auto to_index = levels_[level + 1]; // exclusive
- if ((level == 0) && !is_level_zero_sorted_) {
- increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
- } else {
- increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
- }
- level++;
- weight *= 2;
- }
- // normalize and, if CDF, convert to cumulative
- if (is_CDF) {
- double subtotal = 0;
- for (uint32_t i = 0; i <= size; i++) {
- subtotal += buckets[i];
- buckets[i] = subtotal / n_;
- }
- } else {
- for (uint32_t i = 0; i <= size; i++) {
- buckets[i] /= n_;
- }
- }
- return buckets;
-}
-
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
- const T* split_points, uint32_t size, double* buckets) const
-{
- for (uint32_t i = from_index; i < to_index; i++) {
- uint32_t j;
- for (j = 0; j < size; j++) {
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
- break;
- }
- }
- buckets[j] += weight;
- }
-}
-
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
- const T* split_points, uint32_t size, double* buckets) const
-{
- uint32_t i = from_index;
- uint32_t j = 0;
- while ((i < to_index) && (j < size)) {
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
- buckets[j] += weight; // this sample goes into this bucket
- i++; // move on to next sample and see whether it also goes into this bucket
- } else {
- j++; // no more samples for this bucket
- }
- }
- // now either i == to_index (we are out of samples), or
- // j == size (we are out of buckets, but there are more samples remaining)
- // we only need to do something in the latter case
- if (j == size) {
- buckets[j] += weight * (to_index - i);
- }
-}
-
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename O>
-void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
+void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
A alloc(allocator_);
auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
@@ -950,9 +863,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
}
// this leaves items_ uninitialized (all objects moved out and destroyed)
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename FwdSk>
-void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
+void kll_sketch<T, C, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
worklevels[0] = 0;
// the level zero data from "other" was already inserted into "this"
@@ -976,36 +889,36 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uin
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::assert_correct_total_weight() const {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::assert_correct_total_weight() const {
const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
if (total != n_) {
throw std::logic_error("Total weight does not match N");
}
}
-template<typename T, typename C, typename S, typename A>
-uint32_t kll_sketch<T, C, S, A>::safe_level_size(uint8_t level) const {
+template<typename T, typename C, typename A>
+uint32_t kll_sketch<T, C, A>::safe_level_size(uint8_t level) const {
if (level >= num_levels_) return 0;
return levels_[level + 1] - levels_[level];
}
-template<typename T, typename C, typename S, typename A>
-uint32_t kll_sketch<T, C, S, A>::get_num_retained_above_level_zero() const {
+template<typename T, typename C, typename A>
+uint32_t kll_sketch<T, C, A>::get_num_retained_above_level_zero() const {
if (num_levels_ == 1) return 0;
return levels_[num_levels_] - levels_[1];
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::check_m(uint8_t m) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::check_m(uint8_t m) {
if (m != DEFAULT_M) {
throw std::invalid_argument("Possible corruption: M must be " + std::to_string(DEFAULT_M)
+ ": " + std::to_string(m));
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
if (is_empty || is_single_item) {
@@ -1021,8 +934,8 @@ void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
+ std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
@@ -1030,16 +943,16 @@ void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
}
}
-template<typename T, typename C, typename S, typename A>
-void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::check_family_id(uint8_t family_id) {
if (family_id != FAMILY) {
throw std::invalid_argument("Possible corruption: family mismatch: expected "
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
}
}
-template <typename T, typename C, typename S, typename A>
-string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
+template <typename T, typename C, typename A>
+string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
// The stream does not support passing an allocator instance, and alternatives are complicated.
std::ostringstream os;
@@ -1090,25 +1003,25 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
return string<A>(os.str().c_str(), allocator_);
}
-template <typename T, typename C, typename S, typename A>
-typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin() const {
- return kll_sketch<T, C, S, A>::const_iterator(items_, levels_.data(), num_levels_);
+template <typename T, typename C, typename A>
+typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::begin() const {
+ return kll_sketch<T, C, A>::const_iterator(items_, levels_.data(), num_levels_);
}
-template <typename T, typename C, typename S, typename A>
-typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
- return kll_sketch<T, C, S, A>::const_iterator(nullptr, levels_.data(), num_levels_);
+template <typename T, typename C, typename A>
+typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::end() const {
+ return kll_sketch<T, C, A>::const_iterator(nullptr, levels_.data(), num_levels_);
}
// kll_sketch::const_iterator implementation
-template<typename T, typename C, typename S, typename A>
-kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
+template<typename T, typename C, typename A>
+kll_sketch<T, C, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
{}
-template<typename T, typename C, typename S, typename A>
-typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++() {
+template<typename T, typename C, typename A>
+typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++() {
++index;
if (index == levels[level + 1]) { // go to the next non-empty level
do {
@@ -1119,30 +1032,30 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
return *this;
}
-template<typename T, typename C, typename S, typename A>
-typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++(int) {
+template<typename T, typename C, typename A>
+typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++(int) {
const_iterator tmp(*this);
operator++();
return tmp;
}
-template<typename T, typename C, typename S, typename A>
-bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
+template<typename T, typename C, typename A>
+bool kll_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
return index == other.index;
}
-template<typename T, typename C, typename S, typename A>
-bool kll_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
+template<typename T, typename C, typename A>
+bool kll_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
return !operator==(other);
}
-template<typename T, typename C, typename S, typename A>
-const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator::operator*() const {
+template<typename T, typename C, typename A>
+const std::pair<const T&, const uint64_t> kll_sketch<T, C, A>::const_iterator::operator*() const {
return std::pair<const T&, const uint64_t>(items[index], weight);
}
-template<typename T, typename C, typename S, typename A>
-class kll_sketch<T, C, S, A>::item_deleter {
+template<typename T, typename C, typename A>
+class kll_sketch<T, C, A>::item_deleter {
public:
item_deleter(const A& allocator): allocator_(allocator) {}
void operator() (T* ptr) {
@@ -1155,8 +1068,8 @@ class kll_sketch<T, C, S, A>::item_deleter {
A allocator_;
};
-template<typename T, typename C, typename S, typename A>
-class kll_sketch<T, C, S, A>::items_deleter {
+template<typename T, typename C, typename A>
+class kll_sketch<T, C, A>::items_deleter {
public:
items_deleter(uint32_t start, uint32_t num, const A& allocator):
allocator_(allocator), start_(start), num_(num) {}
@@ -1172,6 +1085,24 @@ class kll_sketch<T, C, S, A>::items_deleter {
uint32_t num_;
};
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::setup_sorted_view() const {
+ if (sorted_view_ == nullptr) {
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantile_sketch_sorted_view<T, C, A>(get_sorted_view());
+ }
+}
+
+template<typename T, typename C, typename A>
+void kll_sketch<T, C, A>::reset_sorted_view() {
+ if (sorted_view_ != nullptr) {
+ sorted_view_->~quantile_sketch_sorted_view();
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
+ sorted_view_ = nullptr;
+ }
+}
+
} /* namespace datasketches */
#endif
diff --git a/kll/test/CMakeLists.txt b/kll/test/CMakeLists.txt
index e5f8325..2c554e8 100644
--- a/kll/test/CMakeLists.txt
+++ b/kll/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(kll_test)
-target_link_libraries(kll_test kll common_test)
+target_link_libraries(kll_test kll common_test_lib)
set_target_properties(kll_test PROPERTIES
CXX_STANDARD 11
diff --git a/kll/test/kll_sketch_custom_type_test.cpp b/kll/test/kll_sketch_custom_type_test.cpp
index 0b68d56..409bd63 100644
--- a/kll/test/kll_sketch_custom_type_test.cpp
+++ b/kll/test/kll_sketch_custom_type_test.cpp
@@ -26,7 +26,7 @@
namespace datasketches {
-using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
+using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_allocator<test_type>>;
using alloc = test_allocator<test_type>;
TEST_CASE("kll sketch custom type", "[kll_sketch]") {
@@ -37,9 +37,9 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
SECTION("compact level zero") {
kll_test_type_sketch sketch(8, 0);
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
sketch.update(1);
sketch.update(2);
@@ -55,8 +55,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() > sketch.get_num_retained());
- REQUIRE(sketch.get_min_value().get_value() == 1);
- REQUIRE(sketch.get_max_value().get_value() == 9);
+ REQUIRE(sketch.get_min_item().get_value() == 1);
+ REQUIRE(sketch.get_max_item().get_value() == 9);
}
SECTION("merge small") {
@@ -72,8 +72,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
- REQUIRE(sketch2.get_min_value().get_value() == 1);
- REQUIRE(sketch2.get_max_value().get_value() == 2);
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
}
SECTION("merge higher levels") {
@@ -105,8 +105,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
- REQUIRE(sketch2.get_min_value().get_value() == 1);
- REQUIRE(sketch2.get_max_value().get_value() == 18);
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
+ REQUIRE(sketch2.get_max_item().get_value() == 18);
}
SECTION("serialize deserialize") {
@@ -116,17 +116,17 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
for (int i = 0; i < n; i++) sketch1.update(i);
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
- sketch1.serialize(s);
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
- auto sketch2 = kll_test_type_sketch::deserialize(s, alloc(0));
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
+ sketch1.serialize(s, test_type_serde());
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
+ auto sketch2 = kll_test_type_sketch::deserialize(s, test_type_serde(), alloc(0));
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
REQUIRE(s.tellg() == s.tellp());
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
@@ -141,8 +141,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
kll_test_type_sketch sketch2(8, 0);
sketch2.update(10);
sketch2.merge(std::move(sketch1));
- REQUIRE(sketch2.get_min_value().get_value() == 0);
- REQUIRE(sketch2.get_max_value().get_value() == 10);
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
REQUIRE(sketch2.get_n() == 11);
}
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index d938b1a..0d9d5a6 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
#endif
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
-using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
+using kll_float_sketch = kll_sketch<float, std::less<float>, test_allocator<float>>;
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
-using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
+using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, test_allocator<std::string>>;
TEST_CASE("kll sketch", "[kll_sketch]") {
@@ -53,6 +53,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
// MAX_K + 1 makes no sense because k is uint16_t
+ //std::cout << "sizeof(kll_sketch<float>)=" << sizeof(kll_sketch<float>) << "\n";
+ //std::cout << "sizeof(kll_sketch<double>)=" << sizeof(kll_sketch<double>) << "\n";
}
SECTION("empty") {
@@ -62,8 +64,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
REQUIRE(std::isnan(sketch.get_rank(0)));
- REQUIRE(std::isnan(sketch.get_min_value()));
- REQUIRE(std::isnan(sketch.get_max_value()));
+ REQUIRE(std::isnan(sketch.get_min_item()));
+ REQUIRE(std::isnan(sketch.get_max_item()));
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
const double fractions[3] {0, 0.5, 1};
REQUIRE(sketch.get_quantiles(fractions, 3).size() == 0);
@@ -90,12 +92,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_rank(1.0f) == 0.0);
- REQUIRE(sketch.get_rank<true>(1.0f) == 1.0);
- REQUIRE(sketch.get_rank(2.0f) == 1.0);
+ REQUIRE(sketch.get_rank(1.0f, false) == 0.0);
+ REQUIRE(sketch.get_rank(1.0f) == 1.0);
+ REQUIRE(sketch.get_rank(2.0f, false) == 1.0);
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1.0);
- REQUIRE(sketch.get_min_value() == 1.0);
- REQUIRE(sketch.get_max_value() == 1.0);
+ REQUIRE(sketch.get_min_item() == 1.0);
+ REQUIRE(sketch.get_max_item() == 1.0);
REQUIRE(sketch.get_quantile(0.5) == 1.0);
const double fractions[3] {0, 0.5, 1};
auto quantiles = sketch.get_quantiles(fractions, 3);
@@ -132,13 +134,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch.is_empty());
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_num_retained() == n);
- REQUIRE(sketch.get_min_value() == 0.0);
+ REQUIRE(sketch.get_min_item() == 0.0);
REQUIRE(sketch.get_quantile(0) == 0.0);
- REQUIRE(sketch.get_max_value() == n - 1);
+ REQUIRE(sketch.get_max_item() == n - 1);
REQUIRE(sketch.get_quantile(1) == n - 1);
- const double fractions[3] {0, 0.5, 1};
- auto quantiles = sketch.get_quantiles(fractions, 3);
+ const double ranks[3] {0, 0.5, 1};
+ auto quantiles = sketch.get_quantiles(ranks, 3, false);
REQUIRE(quantiles.size() == 3);
REQUIRE(quantiles[0] == 0.0);
REQUIRE(quantiles[1] == n / 2);
@@ -146,13 +148,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
for (uint32_t i = 0; i < n; i++) {
const double true_rank = (double) i / n;
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank);
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank);
const double true_rank_inclusive = (double) (i + 1) / n;
- REQUIRE(sketch.get_rank<true>(static_cast<float>(i)) == true_rank_inclusive);
+ REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
}
// the alternative method must produce the same result
- auto quantiles2 = sketch.get_quantiles(3);
+ auto quantiles2 = sketch.get_quantiles(3, false);
REQUIRE(quantiles2.size() == 3);
REQUIRE(quantiles[0] == quantiles2[0]);
REQUIRE(quantiles[1] == quantiles2[1]);
@@ -172,7 +174,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
sketch.update(9.0f);
sketch.update(10.0f);
REQUIRE(sketch.get_quantile(0) == 1.0);
- REQUIRE(sketch.get_quantile(0.5) == 6.0);
+ REQUIRE(sketch.get_quantile(0.5) == 5.0);
REQUIRE(sketch.get_quantile(0.99) == 10.0);
REQUIRE(sketch.get_quantile(1) == 10.0);
}
@@ -181,9 +183,9 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
kll_float_sketch sketch(200, 0);
for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
REQUIRE(sketch.get_quantile(0) == 0);
- REQUIRE(sketch.get_quantile(0.01) == 1);
- REQUIRE(sketch.get_quantile(0.5) == 50);
- REQUIRE(sketch.get_quantile(0.99) == 99.0);
+ REQUIRE(sketch.get_quantile(0.01) == 0);
+ REQUIRE(sketch.get_quantile(0.5) == 49);
+ REQUIRE(sketch.get_quantile(0.99) == 98.0);
REQUIRE(sketch.get_quantile(1) == 99.0);
}
@@ -196,30 +198,28 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
}
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.is_estimation_mode());
- REQUIRE(sketch.get_min_value() == 0.0); // min value is exact
- REQUIRE(sketch.get_quantile(0) == 0.0); // min value is exact
- REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
- REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
+ REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
+ REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
// test rank
for (int i = 0; i < n; i++) {
const double trueRank = (double) i / n;
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
}
// test quantiles at every 0.1 percentage point
- double fractions[1001];
- double reverse_fractions[1001]; // check that ordering does not matter
+ double ranks[1001];
+ double reverse_ranks[1001]; // check that ordering does not matter
for (int i = 0; i < 1001; i++) {
- fractions[i] = (double) i / 1000;
- reverse_fractions[1000 - i] = fractions[i];
+ ranks[i] = (double) i / 1000;
+ reverse_ranks[1000 - i] = ranks[i];
}
- auto quantiles = sketch.get_quantiles(fractions, 1001);
- auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
- float previous_quantile(0);
+ auto quantiles = sketch.get_quantiles(ranks, 1001);
+ auto reverse_quantiles = sketch.get_quantiles(reverse_ranks, 1001);
+ float previous_quantile = 0;
for (int i = 0; i < 1001; i++) {
// expensive in a loop, just to check the equivalence here, not advised for real code
- const float quantile = sketch.get_quantile(fractions[i]);
+ const float quantile = sketch.get_quantile(ranks[i]);
REQUIRE(quantiles[i] == quantile);
REQUIRE(reverse_quantiles[1000 - i] == quantile);
REQUIRE(previous_quantile <= quantile);
@@ -238,45 +238,41 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(total_weight == sketch.get_n());
}
- SECTION("consistency between get_rank adn get_PMF/CDF") {
+ SECTION("consistency between get_rank and get_PMF/CDF") {
kll_float_sketch sketch(200, 0);
- const int n = 1000;
+ const int n = 200;
float values[n];
for (int i = 0; i < n; i++) {
sketch.update(static_cast<float>(i));
values[i] = static_cast<float>(i);
}
- { // inclusive=false (default)
- const auto ranks(sketch.get_CDF(values, n));
- const auto pmf(sketch.get_PMF(values, n));
+ { // inclusive=false
+ const auto ranks(sketch.get_CDF(values, n, false));
+ const auto pmf(sketch.get_PMF(values, n, false));
double subtotal_pmf = 0;
for (int i = 0; i < n; i++) {
- if (sketch.get_rank(values[i]) != ranks[i]) {
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
+ if (sketch.get_rank(values[i], false) != ranks[i]) {
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
}
subtotal_pmf += pmf[i];
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
- std::cerr << "CDF vs PMF for value " << i << std::endl;
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
+ FAIL("CDF vs PMF for value " + std::to_string(i));
}
}
}
- { // inclusive=true
- const auto ranks(sketch.get_CDF<true>(values, n));
- const auto pmf(sketch.get_PMF<true>(values, n));
+ { // inclusive=true (default)
+ const auto ranks(sketch.get_CDF(values, n));
+ const auto pmf(sketch.get_PMF(values, n));
double subtotal_pmf = 0;
for (int i = 0; i < n; i++) {
- if (sketch.get_rank<true>(values[i]) != ranks[i]) {
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
+ if (sketch.get_rank(values[i]) != ranks[i]) {
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
}
subtotal_pmf += pmf[i];
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
- std::cerr << "CDF vs PMF for value " << i << std::endl;
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
+ FAIL("CDF vs PMF for value " + std::to_string(i));
}
}
}
@@ -286,13 +282,13 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
std::ifstream is;
is.exceptions(std::ios::failbit | std::ios::badbit);
is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
- auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
+ auto sketch = kll_float_sketch::deserialize(is, serde<float>(), 0);
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1000000);
REQUIRE(sketch.get_num_retained() == 614);
- REQUIRE(sketch.get_min_value() == 0.0);
- REQUIRE(sketch.get_max_value() == 999999.0);
+ REQUIRE(sketch.get_min_item() == 0.0);
+ REQUIRE(sketch.get_max_item() == 999999.0);
}
SECTION("stream serialize deserialize empty") {
@@ -300,15 +296,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
sketch.serialize(s);
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
- auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
REQUIRE(s.tellg() == s.tellp());
REQUIRE(sketch2.is_empty() == sketch.is_empty());
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -322,8 +318,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -341,11 +337,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 1);
REQUIRE(sketch2.get_num_retained() == 1);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 1.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 1.0);
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
- REQUIRE(sketch2.get_rank(1) == 0.0);
- REQUIRE(sketch2.get_rank(2) == 1.0);
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
}
SECTION("bytes serialize deserialize one item") {
@@ -359,11 +355,11 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 1);
REQUIRE(sketch2.get_num_retained() == 1);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 1.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 1.0);
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
- REQUIRE(sketch2.get_rank(1) == 0.0);
- REQUIRE(sketch2.get_rank(2) == 1.0);
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
}
SECTION("deserialize one item v1") {
@@ -375,8 +371,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_min_value() == 1.0);
- REQUIRE(sketch.get_max_value() == 1.0);
+ REQUIRE(sketch.get_min_item() == 1.0);
+ REQUIRE(sketch.get_max_item() == 1.0);
}
SECTION("stream serialize deserialize three items") {
@@ -394,8 +390,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 3);
REQUIRE(sketch2.get_num_retained() == 3);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 3.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 3.0);
}
SECTION("bytes serialize deserialize three items") {
@@ -411,8 +407,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 3);
REQUIRE(sketch2.get_num_retained() == 3);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 3.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 3.0);
}
SECTION("stream serialize deserialize many floats") {
@@ -429,8 +425,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -450,16 +446,16 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 7), std::out_of_range);
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 15), std::out_of_range);
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 7, serde<float>(), 0), std::out_of_range);
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 15, serde<float>(), 0), std::out_of_range);
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(), 0), std::out_of_range);
}
SECTION("bytes serialize deserialize many ints") {
@@ -474,8 +470,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -528,17 +524,17 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
sketch2.update(static_cast<float>((2 * n) - i - 1));
}
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
- REQUIRE(sketch2.get_min_value() == n);
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
+ REQUIRE(sketch2.get_min_item() == n);
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
sketch1.merge(sketch2);
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == 2 * n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
}
@@ -551,10 +547,10 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
sketch2.update(static_cast<float>((2 * n) - i - 1));
}
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
- REQUIRE(sketch2.get_min_value() == n);
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
+ REQUIRE(sketch2.get_min_item() == n);
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_k() == 256);
REQUIRE(sketch2.get_k() == 128);
@@ -570,8 +566,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == 2 * n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
}
@@ -590,8 +586,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
sketch2.update(0);
@@ -606,8 +602,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
sketch1.update(1.0f);
sketch2.update(2.0f);
sketch2.merge(sketch1);
- REQUIRE(sketch2.get_min_value() == 1.0f);
- REQUIRE(sketch2.get_max_value() == 2.0f);
+ REQUIRE(sketch2.get_min_item() == 1.0f);
+ REQUIRE(sketch2.get_max_item() == 2.0f);
}
SECTION("merge min and max values from other") {
@@ -615,15 +611,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
kll_float_sketch sketch2(200, 0);
sketch2.merge(sketch1);
- REQUIRE(sketch2.get_min_value() == 0.0f);
- REQUIRE(sketch2.get_max_value() == 999999.0f);
+ REQUIRE(sketch2.get_min_item() == 0.0f);
+ REQUIRE(sketch2.get_max_item() == 999999.0f);
}
SECTION("sketch of ints") {
kll_sketch<int> sketch;
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
const int n = 1000;
for (int i = 0; i < n; i++) sketch.update(i);
@@ -638,8 +634,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -650,28 +646,28 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
SECTION("sketch of strings stream") {
kll_string_sketch sketch1(200, 0);
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
const int n = 1000;
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- REQUIRE(sketch1.get_min_value() == std::string("0"));
- REQUIRE(sketch1.get_max_value() == std::string("999"));
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
sketch1.serialize(s);
REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
- auto sketch2 = kll_string_sketch::deserialize(s, test_allocator<std::string>(0));
+ auto sketch2 = kll_string_sketch::deserialize(s, serde<std::string>(), 0);
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
REQUIRE(s.tellg() == s.tellp());
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -689,15 +685,15 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
SECTION("sketch of strings bytes") {
kll_string_sketch sketch1(200, 0);
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
const int n = 1000;
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- REQUIRE(sketch1.get_min_value() == std::string("0"));
- REQUIRE(sketch1.get_max_value() == std::string("999"));
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
auto bytes = sketch1.serialize();
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
@@ -707,8 +703,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -753,14 +749,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
// move constructor
kll_sketch<int> sketch2(std::move(sketch1));
for (int i = 0; i < n; i++) {
- REQUIRE(sketch2.get_rank(i) == (double) i / n);
+ REQUIRE(sketch2.get_rank(i, false) == (double) i / n);
}
// move assignment
kll_sketch<int> sketch3;
sketch3 = std::move(sketch2);
for (int i = 0; i < n; i++) {
- REQUIRE(sketch3.get_rank(i) == (double) i / n);
+ REQUIRE(sketch3.get_rank(i, false) == (double) i / n);
}
}
@@ -795,44 +791,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
kll.update(3);
kll.update(1);
- { // non-cumulative, using operator->
- auto view = kll.get_sorted_view(false);
+ {
+ auto view = kll.get_sorted_view();
REQUIRE(view.size() == 3);
auto it = view.begin();
- REQUIRE(it->first == 1);
- REQUIRE(it->second == 1);
- ++it;
- REQUIRE(it->first == 2);
- REQUIRE(it->second == 1);
- ++it;
- REQUIRE(it->first == 3);
+ REQUIRE(it->first == 1); // operator->
+ REQUIRE((*it).first == 1); // operator*
REQUIRE(it->second == 1);
- }
- { // cumulative, non-inclusive, using operator->
- auto view = kll.get_sorted_view(true);
- REQUIRE(view.size() == 3);
- auto it = view.begin();
- REQUIRE(it->first == 1);
- REQUIRE(it->second == 0);
+ REQUIRE(it.get_weight() == 1);
++it;
REQUIRE(it->first == 2);
- REQUIRE(it->second == 1);
- ++it;
- REQUIRE(it->first == 3);
REQUIRE(it->second == 2);
- }
- { // cumulative, inclusive, using operator*
- auto view = kll.get_sorted_view<true>(true);
- REQUIRE(view.size() == 3);
- auto it = view.begin();
- REQUIRE((*it).first == 1);
- REQUIRE((*it).second == 1);
+ REQUIRE(it.get_weight() == 1);
++it;
- REQUIRE((*it).first == 2);
- REQUIRE((*it).second == 2);
+ REQUIRE(it->first == 3);
+ REQUIRE(it->second == 3);
+ REQUIRE(it.get_weight() == 1);
++it;
- REQUIRE((*it).first == 3);
- REQUIRE((*it).second == 3);
+ REQUIRE(it == view.end());
}
}
@@ -854,8 +830,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
REQUIRE(kll_float.get_n() == kll_double.get_n());
REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
- auto sv_float = kll_float.get_sorted_view(false);
- auto sv_double = kll_double.get_sorted_view(false);
+ auto sv_float = kll_float.get_sorted_view();
+ auto sv_double = kll_double.get_sorted_view();
auto sv_float_it = sv_float.begin();
auto sv_double_it = sv_double.begin();
while (sv_float_it != sv_float.end()) {
diff --git a/kll/test/kll_sketch_validation.cpp b/kll/test/kll_sketch_validation.cpp
index 9ce8ae5..31ab2c1 100644
--- a/kll/test/kll_sketch_validation.cpp
+++ b/kll/test/kll_sketch_validation.cpp
@@ -22,14 +22,11 @@
#include <kll_sketch.hpp>
#include <kll_helper.hpp>
-#include <assert.h>
-
#ifdef KLL_VALIDATION
// This is to make sure the implementation matches exactly the reference implementation in OCaml.
-// Conditional compilation is used because the implementation needs a few modifications:
-// - switch from random choice to deterministic
-// - a few methods to expose internals of the sketch
+// Conditional compilation is used because the implementation needs
+// to switch from random choice to deterministic
namespace datasketches {
@@ -155,9 +152,9 @@ const int64_t correct_results[num_tests * 7] = {
};
static std::unique_ptr<int[]> make_input_array(unsigned n, unsigned stride) {
- assert (kll_helper::is_odd(stride));
- unsigned mask((1 << 23) - 1); // because library items are single-precision floats at the moment
- unsigned cur(0);
+ if (!kll_helper::is_odd(stride)) throw std::logic_error("stride must be odd");
+ unsigned mask = (1 << 23) - 1; // because items are single-precision floats at the moment
+ unsigned cur = 0;
std::unique_ptr<int[]> arr(new int[n]);
for (unsigned i = 0; i < n; i++) {
cur += stride;
@@ -167,50 +164,63 @@ static std::unique_ptr<int[]> make_input_array(unsigned n, unsigned stride) {
return arr;
}
-static int64_t simple_hash_of_sub_array(const float* arr, unsigned start, unsigned length) {
- int64_t multiplier(738219921); // an arbitrary odd 30-bit number
- int64_t mask60((1ULL << 60) - 1ULL);
- int64_t accum(0);
- for (unsigned i = start; i < start + length; i++) {
- accum += (int64_t) arr[i];
+template<typename It>
+std::pair<int64_t, uint8_t> hash_samples_and_count_levels(It from, It to) {
+ int64_t multiplier = 738219921; // an arbitrary odd 30-bit number
+ int64_t mask60 = (1ULL << 60) - 1ULL;
+ int64_t accum = 0;
+ uint8_t num_levels = 1;
+ for (auto it = from; it != to; ++it) {
+ accum += static_cast<int64_t>((*it).first);
accum *= multiplier;
accum &= mask60;
accum ^= accum >> 30;
+ const uint8_t level = count_trailing_zeros_in_u64((*it).second);
+ if (num_levels <= level) num_levels = level + 1;
}
- return accum;
+ return std::pair<uint64_t, uint8_t>(accum, num_levels);
}
TEST_CASE("kll validation", "[kll_sketch][validation]") {
for (unsigned i = 0; i < num_tests; i++) {
- assert (correct_results[7 * i] == i);
- unsigned k(correct_results[7 * i + 1]);
- unsigned n(correct_results[7 * i + 2]);
- unsigned stride(correct_results[7 * i + 3]);
+ if (correct_results[7 * i] != i) throw std::logic_error("test number mismatch");
+ unsigned k = correct_results[7 * i + 1];
+ unsigned n = correct_results[7 * i + 2];
+ unsigned stride = correct_results[7 * i + 3];
std::unique_ptr<int[]> input_array = make_input_array(n, stride);
kll_sketch<float> sketch(k);
kll_next_offset = 0;
for (unsigned j = 0; j < n; j++) {
sketch.update(input_array[j]);
}
- unsigned num_levels = sketch.get_num_levels();
unsigned num_samples = sketch.get_num_retained();
- int64_t hashed_samples = simple_hash_of_sub_array(sketch.get_items(), sketch.get_levels()[0], num_samples);
+ auto p = hash_samples_and_count_levels(sketch.begin(), sketch.end());
std::cout << i;
- REQUIRE(correct_results[7 * i + 4] == num_levels);
+ REQUIRE(correct_results[7 * i + 4] == p.second);
REQUIRE(correct_results[7 * i + 5] == num_samples);
- if (correct_results[7 * i + 6] == hashed_samples) {
+ if (correct_results[7 * i + 6] == p.first) {
std::cout << " pass" << std::endl;
} else {
- std::cout << " " << (correct_results[7 * i + 6]) << " != " << hashed_samples;
- sketch.to_stream(std::cout);
+ std::cout << " " << (correct_results[7 * i + 6]) << " != " << p.first;
+ std::cout << sketch.to_string();
FAIL();
}
}
}
-TEST_CASE("kll validation: test hash", "[kll_sketch][validaiton]") {
- float array[] = { 907500, 944104, 807020, 219921, 678370, 955217, 426885 };
- REQUIRE(simple_hash_of_sub_array(array, 1, 5) == 1141543353991880193LL);
+TEST_CASE("kll validation: test hash and num levels", "[kll_sketch][validaiton]") {
+ std::pair<float, uint64_t> array[] = {
+ std::make_pair(907500, 1),
+ std::make_pair(944104, 1),
+ std::make_pair(807020, 2),
+ std::make_pair(219921, 2),
+ std::make_pair(678370, 2),
+ std::make_pair(955217, 4),
+ std::make_pair(426885, 8)
+ };
+ auto hash_and_num_levels = hash_samples_and_count_levels(array + 1, array + 6);
+ REQUIRE(hash_and_num_levels.first == 1141543353991880193LL);
+ REQUIRE(hash_and_num_levels.second == 3);
}
TEST_CASE("kll validation: make input array", "[kll_sketch][validaiton]") {
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 3d48c1f..b99340b 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -51,39 +51,17 @@ double kll_sketch_generic_normalized_rank_error(uint16_t k, bool pmf) {
return kll_sketch<T>::get_normalized_rank_error(k, pmf);
}
-template<typename T>
-double kll_sketch_get_rank(const kll_sketch<T>& sk, const T& item, bool inclusive) {
- if (inclusive)
- return sk.template get_rank<true>(item);
- else
- return sk.template get_rank<false>(item);
-}
-
-template<typename T>
-T kll_sketch_get_quantile(const kll_sketch<T>& sk,
- double rank,
- bool inclusive) {
- if (inclusive)
- return T(sk.template get_quantile<true>(rank));
- else
- return T(sk.template get_quantile<false>(rank));
-}
-
template<typename T>
py::list kll_sketch_get_quantiles(const kll_sketch<T>& sk,
- std::vector<double>& fractions,
+ std::vector<double>& ranks,
bool inclusive) {
- size_t nQuantiles = fractions.size();
- auto result = inclusive ?
- sk.template get_quantiles<true>(fractions.data(), nQuantiles)
- : sk.template get_quantiles<false>(fractions.data(), nQuantiles);
-
+ size_t nQuantiles = ranks.size();
+ auto result = sk.get_quantiles(ranks.data(), nQuantiles, inclusive);
// returning as std::vector<> would copy values to a list anyway
py::list list(nQuantiles);
for (size_t i = 0; i < nQuantiles; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -92,15 +70,11 @@ py::list kll_sketch_get_pmf(const kll_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t nPoints = split_points.size();
- auto result = inclusive ?
- sk.template get_PMF<true>(split_points.data(), nPoints)
- : sk.template get_PMF<false>(split_points.data(), nPoints);
-
+ auto result = sk.get_PMF(split_points.data(), nPoints, inclusive);
py::list list(nPoints + 1);
for (size_t i = 0; i <= nPoints; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -109,15 +83,11 @@ py::list kll_sketch_get_cdf(const kll_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t nPoints = split_points.size();
- auto result = inclusive ?
- sk.template get_CDF<true>(split_points.data(), nPoints)
- : sk.template get_CDF<false>(split_points.data(), nPoints);
-
+ auto result = sk.get_CDF(split_points.data(), nPoints, inclusive);
py::list list(nPoints + 1);
for (size_t i = 0; i <= nPoints; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -166,29 +136,22 @@ void bind_kll_sketch(py::module &m, const char* name) {
"Returns the number of retained items (samples) in the sketch")
.def("is_estimation_mode", &kll_sketch<T>::is_estimation_mode,
"Returns True if the sketch is in estimation mode, otherwise False")
- .def("get_min_value", &kll_sketch<T>::get_min_value,
- "Returns the minimum value from the stream. If empty, kll_floats_sketch retursn nan; kll_ints_sketch throws a RuntimeError")
- .def("get_max_value", &kll_sketch<T>::get_max_value,
- "Returns the maximum value from the stream. If empty, kll_floats_sketch retursn nan; kll_ints_sketch throws a RuntimeError")
- .def("get_quantile", &dspy::kll_sketch_get_quantile<T>, py::arg("fraction"), py::arg("inclusive")=false,
- "Returns an approximation to the value of the data item "
- "that would be preceded by the given fraction of a hypothetical sorted "
+ .def("get_min_value", &kll_sketch<T>::get_min_item,
+ "Returns the minimum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError")
+ .def("get_max_value", &kll_sketch<T>::get_max_item,
+ "Returns the maximum value from the stream. If empty, kll_floats_sketch returns nan; kll_ints_sketch throws a RuntimeError")
+ .def("get_quantile", &kll_sketch<T>::get_quantile, py::arg("rank"), py::arg("inclusive")=false,
+ "Returns an approximation to the data value "
+ "associated with the given normalized rank in a hypothetical sorted "
"version of the input stream so far.\n"
- "Note that this method has a fairly large overhead (microseconds instead of nanoseconds) "
- "so it should not be called multiple times to get different quantiles from the same "
- "sketch. Instead use get_quantiles(), which pays the overhead only once.\n"
"For kll_floats_sketch: if the sketch is empty this returns nan. "
"For kll_ints_sketch: if the sketch is empty this throws a RuntimeError.")
- .def("get_quantiles", &dspy::kll_sketch_get_quantiles<T>, py::arg("fractions"), py::arg("inclusive")=false,
- "This is a more efficient multiple-query version of get_quantile().\n"
+ .def("get_quantiles", &dspy::kll_sketch_get_quantiles<T>, py::arg("ranks"), py::arg("inclusive")=false,
"This returns an array that could have been generated by using get_quantile() for each "
- "fractional rank separately, but would be very inefficient. "
- "This method incurs the internal set-up overhead once and obtains multiple quantile values in "
- "a single query. It is strongly recommend that this method be used instead of multiple calls "
- "to get_quantile().\n"
+ "normalized rank separately.\n"
"If the sketch is empty this returns an empty vector.")
- .def("get_rank", &dspy::kll_sketch_get_rank<T>, py::arg("value"), py::arg("inclusive")=false,
- "Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, inclusive.\n"
+ .def("get_rank", &kll_sketch<T>::get_rank, py::arg("value"), py::arg("inclusive")=false,
+ "Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.\n"
"The resulting approximation has a probabilistic guarantee that can be obtained from the "
"get_normalized_rank_error(False) function.\n"
"With the parameter inclusive=true the weight of the given value is included into the rank."
diff --git a/python/src/quantiles_wrapper.cpp b/python/src/quantiles_wrapper.cpp
index 7633ea2..e493cc6 100644
--- a/python/src/quantiles_wrapper.cpp
+++ b/python/src/quantiles_wrapper.cpp
@@ -49,41 +49,17 @@ double quantiles_sketch_generic_normalized_rank_error(uint16_t k, bool pmf) {
return quantiles_sketch<T>::get_normalized_rank_error(k, pmf);
}
-template<typename T>
-double quantiles_sketch_get_rank(const quantiles_sketch<T>& sk,
- const T& item,
- bool inclusive) {
- if (inclusive)
- return sk.template get_rank<true>(item);
- else
- return sk.template get_rank<false>(item);
-}
-
-template<typename T>
-T quantiles_sketch_get_quantile(const quantiles_sketch<T>& sk,
- double rank,
- bool inclusive) {
- if (inclusive)
- return T(sk.template get_quantile<true>(rank));
- else
- return T(sk.template get_quantile<false>(rank));
-}
-
template<typename T>
py::list quantiles_sketch_get_quantiles(const quantiles_sketch<T>& sk,
- std::vector<double>& fractions,
+ std::vector<double>& ranks,
bool inclusive) {
- size_t n_quantiles = fractions.size();
- auto result = inclusive
- ? sk.template get_quantiles<true>(&fractions[0], static_cast<uint32_t>(n_quantiles))
- : sk.template get_quantiles<false>(&fractions[0], static_cast<uint32_t>(n_quantiles));
-
+ size_t n_quantiles = ranks.size();
+ auto result = sk.get_quantiles(ranks.data(), static_cast<uint32_t>(n_quantiles), inclusive);
// returning as std::vector<> would copy values to a list anyway
py::list list(n_quantiles);
for (size_t i = 0; i < n_quantiles; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -92,15 +68,11 @@ py::list quantiles_sketch_get_pmf(const quantiles_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t n_points = split_points.size();
- auto result = inclusive
- ? sk.template get_PMF<true>(&split_points[0], n_points)
- : sk.template get_PMF<false>(&split_points[0], n_points);
-
+ auto result = sk.get_PMF(split_points.data(), n_points, inclusive);
py::list list(n_points + 1);
for (size_t i = 0; i <= n_points; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -109,15 +81,11 @@ py::list quantiles_sketch_get_cdf(const quantiles_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t n_points = split_points.size();
- auto result = inclusive
- ? sk.template get_CDF<true>(&split_points[0], n_points)
- : sk.template get_CDF<false>(&split_points[0], n_points);
-
+ auto result = sk.get_CDF(split_points.data(), n_points, inclusive);
py::list list(n_points + 1);
for (size_t i = 0; i <= n_points; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -166,31 +134,26 @@ void bind_quantiles_sketch(py::module &m, const char* name) {
"Returns the number of retained items (samples) in the sketch")
.def("is_estimation_mode", &quantiles_sketch<T>::is_estimation_mode,
"Returns True if the sketch is in estimation mode, otherwise False")
- .def("get_min_value", &quantiles_sketch<T>::get_min_value,
+ .def("get_min_value", &quantiles_sketch<T>::get_min_item,
"Returns the minimum value from the stream. If empty, quantiles_floats_sketch returns nan; quantiles_ints_sketch throws a RuntimeError")
- .def("get_max_value", &quantiles_sketch<T>::get_max_value,
+ .def("get_max_value", &quantiles_sketch<T>::get_max_item,
"Returns the maximum value from the stream. If empty, quantiles_floats_sketch returns nan; quantiles_ints_sketch throws a RuntimeError")
- .def("get_quantile", &dspy::quantiles_sketch_get_quantile<T>, py::arg("rank"), py::arg("inclusive")=false,
- "Returns an approximation to the value of the data item "
- "that would be preceded by the given fraction of a hypothetical sorted "
+ .def("get_quantile", &quantiles_sketch<T>::get_quantile, py::arg("rank"), py::arg("inclusive")=false,
+ "Returns an approximation to the data value "
+ "associated with the given rank in a hypothetical sorted "
"version of the input stream so far.\n"
- "Note that this method has a fairly large overhead (microseconds instead of nanoseconds) "
- "so it should not be called multiple times to get different quantiles from the same "
- "sketch. Instead use get_quantiles(), which pays the overhead only once.\n"
"For quantiles_floats_sketch: if the sketch is empty this returns nan. "
"For quantiles_ints_sketch: if the sketch is empty this throws a RuntimeError.")
.def("get_quantiles", &dspy::quantiles_sketch_get_quantiles<T>, py::arg("ranks"), py::arg("inclusive")=false,
- "This is a more efficient multiple-query version of get_quantile().\n"
"This returns an array that could have been generated by using get_quantile() for each "
- "fractional rank separately, but would be very inefficient. "
- "This method incurs the internal set-up overhead once and obtains multiple quantile values in "
- "a single query. It is strongly recommend that this method be used instead of multiple calls "
- "to get_quantile().\n"
+ "normalized rank separately.\n"
"If the sketch is empty this returns an empty vector.")
- .def("get_rank", &dspy::quantiles_sketch_get_rank<T>, py::arg("item"), py::arg("inclusive")=false,
- "Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, inclusive.\n"
+ .def("get_rank", &quantiles_sketch<T>::get_rank, py::arg("value"), py::arg("inclusive")=false,
+ "Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.\n"
"The resulting approximation has a probabilistic guarantee that can be obtained from the "
"get_normalized_rank_error(False) function.\n"
+ "With the parameter inclusive=true the weight of the given value is included into the rank."
+ "Otherwise the rank equals the sum of the weights of values less than the given value.\n"
"If the sketch is empty this returns nan.")
.def("get_pmf", &dspy::quantiles_sketch_get_pmf<T>, py::arg("split_points"), py::arg("inclusive")=false,
"Returns an approximation to the Probability Mass Function (PMF) of the input stream "
diff --git a/python/src/req_wrapper.cpp b/python/src/req_wrapper.cpp
index eeb085a..4d1efad 100644
--- a/python/src/req_wrapper.cpp
+++ b/python/src/req_wrapper.cpp
@@ -51,41 +51,17 @@ double req_sketch_generic_normalized_rank_error(uint16_t k, bool pmf) {
return req_sketch<T>::get_normalized_rank_error(k, pmf);
}
-template<typename T>
-double req_sketch_get_rank(const req_sketch<T>& sk,
- const T& item,
- bool inclusive) {
- if (inclusive)
- return sk.template get_rank<true>(item);
- else
- return sk.template get_rank<false>(item);
-}
-
-template<typename T>
-T req_sketch_get_quantile(const req_sketch<T>& sk,
- double rank,
- bool inclusive) {
- if (inclusive)
- return T(sk.template get_quantile<true>(rank));
- else
- return T(sk.template get_quantile<false>(rank));
-}
-
template<typename T>
py::list req_sketch_get_quantiles(const req_sketch<T>& sk,
- std::vector<double>& fractions,
+ std::vector<double>& ranks,
bool inclusive) {
- size_t n_quantiles = fractions.size();
- auto result = inclusive
- ? sk.template get_quantiles<true>(&fractions[0], n_quantiles)
- : sk.template get_quantiles<false>(&fractions[0], n_quantiles);
-
+ size_t n_quantiles = ranks.size();
+ auto result = sk.get_quantiles(ranks.data(), n_quantiles, inclusive);
// returning as std::vector<> would copy values to a list anyway
py::list list(n_quantiles);
for (size_t i = 0; i < n_quantiles; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -94,15 +70,11 @@ py::list req_sketch_get_pmf(const req_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t n_points = split_points.size();
- auto result = inclusive
- ? sk.template get_PMF<true>(&split_points[0], n_points)
- : sk.template get_PMF<false>(&split_points[0], n_points);
-
+ auto result = sk.get_PMF(split_points.data(), n_points, inclusive);
py::list list(n_points + 1);
for (size_t i = 0; i <= n_points; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -111,15 +83,11 @@ py::list req_sketch_get_cdf(const req_sketch<T>& sk,
std::vector<T>& split_points,
bool inclusive) {
size_t n_points = split_points.size();
- auto result = inclusive
- ? sk.template get_CDF<true>(&split_points[0], n_points)
- : sk.template get_CDF<false>(&split_points[0], n_points);
-
+ auto result = sk.get_CDF(split_points.data(), n_points, inclusive);
py::list list(n_points + 1);
for (size_t i = 0; i <= n_points; ++i) {
list[i] = result[i];
}
-
return list;
}
@@ -170,33 +138,26 @@ void bind_req_sketch(py::module &m, const char* name) {
"Returns the number of retained items (samples) in the sketch")
.def("is_estimation_mode", &req_sketch<T>::is_estimation_mode,
"Returns True if the sketch is in estimation mode, otherwise False")
- .def("get_min_value", &req_sketch<T>::get_min_value,
+ .def("get_min_value", &req_sketch<T>::get_min_item,
"Returns the minimum value from the stream. If empty, req_floats_sketch returns nan; req_ints_sketch throws a RuntimeError")
- .def("get_max_value", &req_sketch<T>::get_max_value,
+ .def("get_max_value", &req_sketch<T>::get_max_item,
"Returns the maximum value from the stream. If empty, req_floats_sketch returns nan; req_ints_sketch throws a RuntimeError")
- .def("get_quantile", &dspy::req_sketch_get_quantile<T>, py::arg("rank"), py::arg("inclusive")=false,
- "Returns an approximation to the value of the data item "
- "that would be preceded by the given fraction of a hypothetical sorted "
+ .def("get_quantile", &req_sketch<T>::get_quantile, py::arg("rank"), py::arg("inclusive")=false,
+ "Returns an approximation to the data value "
+ "associated with the given normalized rank in a hypothetical sorted "
"version of the input stream so far.\n"
- "Note that this method has a fairly large overhead (microseconds instead of nanoseconds) "
- "so it should not be called multiple times to get different quantiles from the same "
- "sketch. Instead use get_quantiles(), which pays the overhead only once.\n"
"For req_floats_sketch: if the sketch is empty this returns nan. "
"For req_ints_sketch: if the sketch is empty this throws a RuntimeError.")
.def("get_quantiles", &dspy::req_sketch_get_quantiles<T>, py::arg("ranks"), py::arg("inclusive")=false,
- "This is a more efficient multiple-query version of get_quantile().\n"
"This returns an array that could have been generated by using get_quantile() for each "
- "fractional rank separately, but would be very inefficient. "
- "This method incurs the internal set-up overhead once and obtains multiple quantile values in "
- "a single query. It is strongly recommend that this method be used instead of multiple calls "
- "to get_quantile().\n"
+ "normalized rank separately.\n"
"If the sketch is empty this returns an empty vector.")
- .def("get_rank", &dspy::req_sketch_get_rank<T>, py::arg("item"), py::arg("inclusive")=false,
- "Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1, inclusive.\n"
+ .def("get_rank", &req_sketch<T>::get_rank, py::arg("value"), py::arg("inclusive")=false,
+ "Returns an approximation to the normalized rank of the given value from 0 to 1, inclusive.\n"
"The resulting approximation has a probabilistic guarantee that can be obtained from the "
"get_normalized_rank_error(False) function.\n"
- "With the parameter inclusive=true the weight of the given item is included into the rank."
- "Otherwise the rank equals the sum of the weights of items less than the given item.\n"
+ "With the parameter inclusive=true the weight of the given value is included into the rank."
+ "Otherwise the rank equals the sum of the weights of values less than the given value.\n"
"If the sketch is empty this returns nan.")
.def("get_pmf", &dspy::req_sketch_get_pmf<T>, py::arg("split_points"), py::arg("inclusive")=false,
"Returns an approximation to the Probability Mass Function (PMF) of the input stream "
diff --git a/python/src/vector_of_kll.cpp b/python/src/vector_of_kll.cpp
index 020e6ee..46b1a5e 100644
--- a/python/src/vector_of_kll.cpp
+++ b/python/src/vector_of_kll.cpp
@@ -36,18 +36,14 @@ namespace vector_of_kll_constants {
}
// Wrapper class for Numpy compatibility
-template <typename T, typename C = std::less<T>, typename S = serde<T>>
+template <typename T, typename C = std::less<T>>
class vector_of_kll_sketches {
public:
- // TODO: Redundant and deprecated. Will be removed in next major version release.
- static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
- static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
-
explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
vector_of_kll_sketches(const vector_of_kll_sketches& other);
vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
- vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
- vector_of_kll_sketches<T,C,S>& operator=(vector_of_kll_sketches&& other);
+ vector_of_kll_sketches<T, C>& operator=(const vector_of_kll_sketches& other);
+ vector_of_kll_sketches<T, C>& operator=(vector_of_kll_sketches&& other);
// container parameters
inline uint32_t get_k() const;
@@ -58,7 +54,7 @@ class vector_of_kll_sketches {
void merge(const vector_of_kll_sketches<T>& other);
// returns a single sketch combining all data in the array
- kll_sketch<T,C,S> collapse(const py::array_t<int>& isk) const;
+ kll_sketch<T, C> collapse(const py::array_t<int>& isk) const;
// sketch queries returning an array of results
py::array is_empty() const;
@@ -67,7 +63,7 @@ class vector_of_kll_sketches {
py::array get_min_values() const;
py::array get_max_values() const;
py::array get_num_retained() const;
- py::array get_quantiles(const py::array_t<double>& fractions, const py::array_t<int>& isk) const;
+ py::array get_quantiles(const py::array_t<double>& ranks, const py::array_t<int>& isk) const;
py::array get_ranks(const py::array_t<T>& values, const py::array_t<int>& isk) const;
py::array get_pmf(const py::array_t<T>& split_points, const py::array_t<int>& isk) const;
py::array get_cdf(const py::array_t<T>& split_points, const py::array_t<int>& isk) const;
@@ -86,11 +82,11 @@ class vector_of_kll_sketches {
const uint32_t k_; // kll sketch k parameter
const uint32_t d_; // number of dimensions (here: sketches) to hold
- std::vector<kll_sketch<T,C,S>> sketches_;
+ std::vector<kll_sketch<T, C>> sketches_;
};
-template<typename T, typename C, typename S>
-vector_of_kll_sketches<T,C,S>::vector_of_kll_sketches(uint32_t k, uint32_t d):
+template<typename T, typename C>
+vector_of_kll_sketches<T, C>::vector_of_kll_sketches(uint32_t k, uint32_t d):
k_(k),
d_(d)
{
@@ -106,49 +102,49 @@ d_(d)
}
}
-template<typename T, typename C, typename S>
-vector_of_kll_sketches<T,C,S>::vector_of_kll_sketches(const vector_of_kll_sketches& other) :
+template<typename T, typename C>
+vector_of_kll_sketches<T, C>::vector_of_kll_sketches(const vector_of_kll_sketches& other) :
k_(other.k_),
d_(other.d_),
sketches_(other.sketches_)
{}
-template<typename T, typename C, typename S>
-vector_of_kll_sketches<T,C,S>::vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept :
+template<typename T, typename C>
+vector_of_kll_sketches<T, C>::vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept :
k_(other.k_),
d_(other.d_),
sketches_(std::move(other.sketches_))
{}
-template<typename T, typename C, typename S>
-vector_of_kll_sketches<T,C,S>& vector_of_kll_sketches<T,C,S>::operator=(const vector_of_kll_sketches& other) {
- vector_of_kll_sketches<T,C,S> copy(other);
+template<typename T, typename C>
+vector_of_kll_sketches<T, C>& vector_of_kll_sketches<T, C>::operator=(const vector_of_kll_sketches& other) {
+ vector_of_kll_sketches<T, C> copy(other);
k_ = copy.k_;
d_ = copy.d_;
std::swap(sketches_, copy.sketches_);
return *this;
}
-template<typename T, typename C, typename S>
-vector_of_kll_sketches<T,C,S>& vector_of_kll_sketches<T,C,S>::operator=(vector_of_kll_sketches&& other) {
+template<typename T, typename C>
+vector_of_kll_sketches<T, C>& vector_of_kll_sketches<T, C>::operator=(vector_of_kll_sketches&& other) {
k_ = other.k_;
d_ = other.d_;
std::swap(sketches_, other.sketches_);
return *this;
}
-template<typename T, typename C, typename S>
-uint32_t vector_of_kll_sketches<T,C,S>::get_k() const {
+template<typename T, typename C>
+uint32_t vector_of_kll_sketches<T, C>::get_k() const {
return k_;
}
-template<typename T, typename C, typename S>
-uint32_t vector_of_kll_sketches<T,C,S>::get_d() const {
+template<typename T, typename C>
+uint32_t vector_of_kll_sketches<T, C>::get_d() const {
return d_;
}
-template<typename T, typename C, typename S>
-std::vector<uint32_t> vector_of_kll_sketches<T,C,S>::get_indices(const py::array_t<int>& isk) const {
+template<typename T, typename C>
+std::vector<uint32_t> vector_of_kll_sketches<T, C>::get_indices(const py::array_t<int>& isk) const {
std::vector<uint32_t> indices;
if (isk.size() == 1) {
auto data = isk.unchecked();
@@ -177,8 +173,8 @@ std::vector<uint32_t> vector_of_kll_sketches<T,C,S>::get_indices(const py::array
}
// Checks if each sketch is empty or not
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::is_empty() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::is_empty() const {
std::vector<bool> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
vals[i] = sketches_[i].is_empty();
@@ -190,8 +186,8 @@ py::array vector_of_kll_sketches<T,C,S>::is_empty() const {
// Updates each sketch with values
// Currently: all values must be present
// TODO: allow subsets of sketches to be updated
-template<typename T, typename C, typename S>
-void vector_of_kll_sketches<T,C,S>::update(const py::array_t<T>& items) {
+template<typename T, typename C>
+void vector_of_kll_sketches<T, C>::update(const py::array_t<T>& items) {
size_t ndim = items.ndim();
@@ -231,8 +227,8 @@ void vector_of_kll_sketches<T,C,S>::update(const py::array_t<T>& items) {
// Merges two arrays of sketches
// Currently: all values must be present
-template<typename T, typename C, typename S>
-void vector_of_kll_sketches<T,C,S>::merge(const vector_of_kll_sketches<T>& other) {
+template<typename T, typename C>
+void vector_of_kll_sketches<T, C>::merge(const vector_of_kll_sketches<T>& other) {
if (d_ != other.get_d()) {
throw std::invalid_argument("Must have same number of dimensions to merge: " + std::to_string(d_)
+ " vs " + std::to_string(other.d_));
@@ -243,11 +239,11 @@ void vector_of_kll_sketches<T,C,S>::merge(const vector_of_kll_sketches<T>& other
}
}
-template<typename T, typename C, typename S>
-kll_sketch<T,C,S> vector_of_kll_sketches<T,C,S>::collapse(const py::array_t<int>& isk) const {
+template<typename T, typename C>
+kll_sketch<T, C> vector_of_kll_sketches<T, C>::collapse(const py::array_t<int>& isk) const {
std::vector<uint32_t> inds = get_indices(isk);
- kll_sketch<T,C,S> result(k_);
+ kll_sketch<T, C> result(k_);
for (auto& idx : inds) {
result.merge(sketches_[idx]);
}
@@ -255,8 +251,8 @@ kll_sketch<T,C,S> vector_of_kll_sketches<T,C,S>::collapse(const py::array_t<int>
}
// Number of updates for each sketch
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_n() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_n() const {
std::vector<uint64_t> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
vals[i] = sketches_[i].get_n();
@@ -265,8 +261,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_n() const {
}
// Number of retained values for each sketch
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_num_retained() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_num_retained() const {
std::vector<uint32_t> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
vals[i] = sketches_[i].get_num_retained();
@@ -276,22 +272,22 @@ py::array vector_of_kll_sketches<T,C,S>::get_num_retained() const {
// Gets the minimum value of each sketch
// TODO: allow subsets of sketches
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_min_values() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_min_values() const {
std::vector<T> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
- vals[i] = sketches_[i].get_min_value();
+ vals[i] = sketches_[i].get_min_item();
}
return py::cast(vals);
}
// Gets the maximum value of each sketch
// TODO: allow subsets of sketches
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_max_values() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_max_values() const {
std::vector<T> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
- vals[i] = sketches_[i].get_max_value();
+ vals[i] = sketches_[i].get_max_item();
}
return py::cast(vals);
}
@@ -299,8 +295,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_max_values() const {
// Summary of each sketch as one long string
// Users should use .split('\n\n') when calling it to build a list of each
// sketch's summary
-template<typename T, typename C, typename S>
-std::string vector_of_kll_sketches<T,C,S>::to_string(bool print_levels, bool print_items) const {
+template<typename T, typename C>
+std::string vector_of_kll_sketches<T, C>::to_string(bool print_levels, bool print_items) const {
std::ostringstream ss;
for (uint32_t i = 0; i < d_; ++i) {
// all streams into 1 string, for compatibility with Python's str() behavior
@@ -311,8 +307,8 @@ std::string vector_of_kll_sketches<T,C,S>::to_string(bool print_levels, bool pri
return ss.str();
}
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::is_estimation_mode() const {
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::is_estimation_mode() const {
std::vector<bool> vals(d_);
for (uint32_t i = 0; i < d_; ++i) {
vals[i] = sketches_[i].is_estimation_mode();
@@ -321,16 +317,16 @@ py::array vector_of_kll_sketches<T,C,S>::is_estimation_mode() const {
}
// Value of sketch(es) corresponding to some quantile(s)
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_quantiles(const py::array_t<double>& fractions,
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_quantiles(const py::array_t<double>& ranks,
const py::array_t<int>& isk) const {
std::vector<uint32_t> inds = get_indices(isk);
size_t num_sketches = inds.size();
- size_t num_quantiles = fractions.size();
+ size_t num_quantiles = ranks.size();
std::vector<std::vector<T>> quants(num_sketches, std::vector<T>(num_quantiles));
for (uint32_t i = 0; i < num_sketches; ++i) {
- auto quant = sketches_[inds[i]].get_quantiles(fractions.data(), num_quantiles);
+ auto quant = sketches_[inds[i]].get_quantiles(ranks.data(), num_quantiles);
for (size_t j = 0; j < num_quantiles; ++j) {
quants[i][j] = quant[j];
}
@@ -340,8 +336,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_quantiles(const py::array_t<double>
}
// Value of sketch(es) corresponding to some rank(s)
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_ranks(const py::array_t<T>& values,
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_ranks(const py::array_t<T>& values,
const py::array_t<int>& isk) const {
std::vector<uint32_t> inds = get_indices(isk);
size_t num_sketches = inds.size();
@@ -359,8 +355,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_ranks(const py::array_t<T>& values,
}
// PMF(s) of sketch(es)
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_pmf(const py::array_t<T>& split_points,
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_pmf(const py::array_t<T>& split_points,
const py::array_t<int>& isk) const {
std::vector<uint32_t> inds = get_indices(isk);
size_t num_sketches = inds.size();
@@ -378,8 +374,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_pmf(const py::array_t<T>& split_poi
}
// CDF(s) of sketch(es)
-template<typename T, typename C, typename S>
-py::array vector_of_kll_sketches<T,C,S>::get_cdf(const py::array_t<T>& split_points,
+template<typename T, typename C>
+py::array vector_of_kll_sketches<T, C>::get_cdf(const py::array_t<T>& split_points,
const py::array_t<int>& isk) const {
std::vector<uint32_t> inds = get_indices(isk);
size_t num_sketches = inds.size();
@@ -396,8 +392,8 @@ py::array vector_of_kll_sketches<T,C,S>::get_cdf(const py::array_t<T>& split_poi
return py::cast(cdfs);
}
-template<typename T, typename C, typename S>
-void vector_of_kll_sketches<T,C,S>::deserialize(const py::bytes& sk_bytes,
+template<typename T, typename C>
+void vector_of_kll_sketches<T, C>::deserialize(const py::bytes& sk_bytes,
uint32_t idx) {
if (idx >= d_) {
throw std::invalid_argument("request for invalid dimenions >= d ("
@@ -408,8 +404,8 @@ void vector_of_kll_sketches<T,C,S>::deserialize(const py::bytes& sk_bytes,
sketches_[idx] = std::move(kll_sketch<T>::deserialize(skStr.c_str(), skStr.length()));
}
-template<typename T, typename C, typename S>
-py::list vector_of_kll_sketches<T,C,S>::serialize(py::array_t<uint32_t>& isk) {
+template<typename T, typename C>
+py::list vector_of_kll_sketches<T, C>::serialize(py::array_t<uint32_t>& isk) {
std::vector<uint32_t> inds = get_indices(isk);
const size_t num_sketches = inds.size();
@@ -466,9 +462,9 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
"Returns the minimum value(s) of the sketch(es)")
.def("get_max_values", &vector_of_kll_sketches<T>::get_max_values,
"Returns the maximum value(s) of the sketch(es)")
- .def("get_quantiles", &vector_of_kll_sketches<T>::get_quantiles, py::arg("fractions"),
+ .def("get_quantiles", &vector_of_kll_sketches<T>::get_quantiles, py::arg("ranks"),
py::arg("isk")=-1,
- "Returns the value(s) associated with the specified quantile(s) for the specified sketch(es). `fractions` can be a float between 0 and 1 (inclusive), or a list/array of values. `isk` specifies which sketch(es) to return the value(s) for (default: all sketches)")
+ "Returns the value(s) associated with the specified quantile(s) for the specified sketch(es). `ranks` can be a float between 0 and 1 (inclusive), or a list/array of values. `isk` specifies which sketch(es) to return the value(s) for (default: all sketches)")
.def("get_ranks", &vector_of_kll_sketches<T>::get_ranks, py::arg("values"),
py::arg("isk")=-1,
"Returns the value(s) associated with the specified ranks(s) for the specified sketch(es). `values` can be an int between 0 and the number of values retained, or a list/array of values. `isk` specifies which sketch(es) to return the value(s) for (default: all sketches)")
diff --git a/quantiles/include/quantiles_sketch.hpp b/quantiles/include/quantiles_sketch.hpp
index b09c552..94211ad 100644
--- a/quantiles/include/quantiles_sketch.hpp
+++ b/quantiles/include/quantiles_sketch.hpp
@@ -32,22 +32,21 @@ namespace datasketches {
/**
* This is a stochastic streaming sketch that enables near-real time analysis of the
- * approximate distribution of real values from a very large stream in a single pass.
- * The analysis is obtained using a getQuantiles(*) function or its inverse functions the
- * Probability Mass Function from getPMF(*) and the Cumulative Distribution Function from getCDF(*).
+ * approximate distribution from a very large stream in a single pass.
+ * The analysis is obtained using get_rank() and get_quantile() functions,
+ * the Probability Mass Function from get_PMF() and the Cumulative Distribution Function from get_CDF().
*
* <p>Consider a large stream of one million values such as packet sizes coming into a network node.
- * The absolute rank of any specific size value is simply its index in the hypothetical sorted
+ * The natural rank of any specific size value is simply its index in the hypothetical sorted
* array of values.
- * The normalized rank (or fractional rank) is the absolute rank divided by the stream size,
+ * The normalized rank is the natural rank divided by the stream size,
* in this case one million.
* The value corresponding to the normalized rank of 0.5 represents the 50th percentile or median
- * value of the distribution, or getQuantile(0.5). Similarly, the 95th percentile is obtained from
- * getQuantile(0.95). Using the getQuantiles(0.0, 1.0) will return the min and max values seen by
- * the sketch.</p>
+ * value of the distribution, or get_quantile(0.5). Similarly, the 95th percentile is obtained from
+ * get_quantile(0.95).</p>
*
* <p>From the min and max values, for example, 1 and 1000 bytes,
- * you can obtain the PMF from getPMF(100, 500, 900) that will result in an array of
+ * you can obtain the PMF from get_PMF(100, 500, 900) that will result in an array of
* 4 fractional values such as {.4, .3, .2, .1}, which means that
* <ul>
* <li>40% of the values were < 100,</li>
@@ -55,18 +54,17 @@ namespace datasketches {
* <li>20% of the values were ≥ 500 and < 900, and</li>
* <li>10% of the values were ≥ 900.</li>
* </ul>
- * A frequency histogram can be obtained by simply multiplying these fractions by getN(),
+ * A frequency histogram can be obtained by simply multiplying these fractions by get_n(),
* which is the total count of values received.
- * The getCDF(*) works similarly, but produces the cumulative distribution instead.
+ * The get_CDF() works similarly, but produces the cumulative distribution instead.
*
* <p>As of November 2021, this implementation produces serialized sketches which are binary-compatible
* with the equivalent Java implementation only when template parameter T = double
* (64-bit double precision values).
-
*
* <p>The accuracy of this sketch is a function of the configured value <i>k</i>, which also affects
* the overall size of the sketch. Accuracy of this quantile sketch is always with respect to
- * the normalized rank. A <i>k</i> of 128 produces a normalized, rank error of about 1.7%.
+ * the normalized rank. A <i>k</i> of 128 produces a normalized, rank error of about 1.7%.
* For example, the median value returned from getQuantile(0.5) will be between the actual values
* from the hypothetically sorted array of input values at normalized ranks of 0.483 and 0.517, with
* a confidence of about 99%.</p>
@@ -122,16 +120,16 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
* <a href="http://dblp.org/rec/html/journals/tods/AgarwalCHPWY13"></a></p>
*
* <p>This algorithm is independent of the distribution of values and
- * requires only that the values be comparable.</p
+ * requires only that the values be comparable.</p>
*
* <p>This algorithm intentionally inserts randomness into the sampling process for values that
* ultimately get retained in the sketch. The results produced by this algorithm are not
* deterministic. For example, if the same stream is inserted into two different instances of this
* sketch, the answers obtained from the two sketches may not be identical.</p>
*
- * <p>Similarly, there may be directional inconsistencies. For example, the resulting array of
- * values obtained from getQuantiles(fractions[]) input into the reverse directional query
- * getPMF(splitPoints[]) may not result in the original fractional values.</p>
+ * <p>Similarly, there may be directional inconsistencies. For example, the result
+ * obtained from get_quantile(rank) input into the reverse directional query
+ * get_rank(item) may not result in the original value.</p>
*
* @author Kevin Lang
* @author Lee Rhodes
@@ -172,10 +170,10 @@ public:
/**
* Updates this sketch with the given data item.
- * @param value an item from a stream of items
+ * @param item from a stream of items
*/
template<typename FwdT>
- void update(FwdT&& value);
+ void update(FwdT&& item);
/**
* Merges another sketch into this one.
@@ -215,20 +213,20 @@ public:
bool is_estimation_mode() const;
/**
- * Returns the min value of the stream.
+ * Returns the min item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the min value of the stream
+ * @return the min item of the stream
*/
- const T& get_min_value() const;
+ const T& get_min_item() const;
/**
- * Returns the max value of the stream.
+ * Returns the max item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the max value of the stream
+ * @return the max item of the stream
*/
- const T& get_max_value() const;
+ const T& get_max_item() const;
/**
* Returns an instance of the comparator for this sketch.
@@ -243,140 +241,115 @@ public:
allocator_type get_allocator() const;
/**
- * Returns an approximation to the value of the data item
- * that would be preceded by the given fraction of a hypothetical sorted
- * version of the input stream so far.
- * <p>
- * Note that this method has a fairly large overhead (microseconds instead of nanoseconds)
- * so it should not be called multiple times to get different quantiles from the same
- * sketch. Instead use get_quantiles(), which pays the overhead only once.
+ * Returns an approximation to the data item associated with the given rank
+ * of a hypothetical sorted version of the input stream so far.
* <p>
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
*
- * @param rank the specified fractional position in the hypothetical sorted stream.
- * These are also called normalized ranks or fractional ranks.
- * If rank = 0.0, the true minimum value of the stream is returned.
- * If rank = 1.0, the true maximum value of the stream is returned.
+ * @param rank the specified normalized rank in the hypothetical sorted stream.
*
- * @return the approximation to the value at the given rank
+ * @return the approximation to the item at the given rank
*/
using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
- template<bool inclusive = false>
- quantile_return_type get_quantile(double rank) const;
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
/**
- * This is a more efficient multiple-query version of get_quantile().
+ * This is a multiple-query version of get_quantile().
* <p>
* This returns an array that could have been generated by using get_quantile() for each
- * fractional rank separately, but would be very inefficient.
- * This method incurs the internal set-up overhead once and obtains multiple quantile values in
- * a single query. It is strongly recommend that this method be used instead of multiple calls
- * to get_quantile().
+ * fractional rank separately.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param fractions given array of fractional positions in the hypothetical sorted stream.
- * These are also called normalized ranks or fractional ranks.
- * These fractions must be in the interval [0.0, 1.0], inclusive.
+ * @param ranks given array of normalized ranks in the hypothetical sorted stream.
+ * These ranks must be in the interval [0.0, 1.0], inclusive.
*
- * @return array of approximations to the given fractions in the same order as given fractions
+ * @return array of approximations to items associated with given ranks in the same order as given ranks
* in the input array.
*/
- template<bool inclusive = false>
- std::vector<T, Allocator> get_quantiles(const double* fractions, uint32_t size) const;
+ std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
/**
* This is a multiple-query version of get_quantile() that allows the caller to
- * specify the number of evenly-spaced fractional ranks.
+ * specify the number of evenly-spaced normalized ranks.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param num an integer that specifies the number of evenly-spaced fractional ranks.
- * This must be an integer greater than 0. A value of 1 will return the min value.
- * A value of 2 will return the min and the max value. A value of 3 will return the min,
- * the median and the max value, etc.
+ * @param num an integer that specifies the number of evenly-spaced ranks.
+ * This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
+ * A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
+ * get_quantiles([0, 0.5, 1]), etc.
*
- * @return array of approximations to the given number of evenly-spaced fractional ranks.
+ * @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
*/
- template<bool inclusive = false>
- std::vector<T, Allocator> get_quantiles(uint32_t num) const;
+ std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
/**
- * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
- * inclusive. When template parameter <em>inclusive=false</em> (the default), only elements strictly
- * less than the provided value are included in the rank estimate. With <em>inclusive=true</em>,
- * the rank estimate includes elements less than or equal to the provided value.
+ * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
*
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
* <p>If the sketch is empty this returns NaN.
*
- * @param value to be ranked
- * @return an approximate rank of the given value
+ * @param item to be ranked
+ * @param inclusive if true the weight of the given item is included into the rank.
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
+ * according to the comparator C.
+ * @return an approximate normalized rank of the given item
*/
- template<bool inclusive = false>
- double get_rank(const T& value) const;
+ double get_rank(const T& item, bool inclusive = true) const;
/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
- * given a set of split points (values).
+ * given a set of split points (items).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(true) function.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * If the template parameter <em>inclusive=false</em> (the default), the definition of an "interval"
- * is inclusive of the left split point and exclusive of the right
- * split point, with the exception that the last interval will include the maximum value.
- * If the template parameter <em>inclusive=true</em>, the definition of an "interval" is exclusive of
- * the left split point and inclusive of the right split point.
- * It is not necessary to include either the min or max values in these split points.
+ *
+ * @param size of the array of split points.
+ *
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * split point, with the exception that the last interval will include the maximum item.
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ * split point.
*
* @return an array of m+1 doubles each of which is an approximation
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
- * When <em>inclusive=false</em> (the default), the definition of an "interval" is inclusive
- * of the left split point and exclusive of the right split point, with the exception that the last
- * interval will include the maximum value. When <em>inclusive=true</em>,
- * an "interval" is exclusive of the left split point and inclusive of the right.
*/
- template<bool inclusive = false>
- vector_double get_PMF(const T* split_points, uint32_t size) const;
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
- * cumulative analog of the PMF, of the input stream given a set of split points (values).
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
*
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
* get_normalized_rank_error(false) function.
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * If the template parameter <em>inclusive=false</em> (the default), the definition of an "interval" is
- * inclusive of the left split point and exclusive of the right
- * split point, with the exception that the last interval will include the maximum value.
- * If the template parameter <em>inclusive=true</em>, the definition of an "interval" is exclusive of
- * the left split point and inclusive of the right split point.
- * It is not necessary to include either the min or max values in these split points.
+ *
+ * @param size of the array of split points.
+ *
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * split point, with the exception that the last interval will include the maximum item.
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ * split point.
*
* @return an array of m+1 double values, which are a consecutive approximation to the CDF
* of the input stream given the split_points. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
- * When <em>inclusive=false</em> (the default), the definition of an "interval" is inclusive
- * of the left split point and exclusive of the right split point, with the exception that the last
- * interval will include the maximum value. When <em>inclusive=true</em>,
- * an "interval" is exclusive of the left split point and inclusive of the right.
-
*/
- template<bool inclusive = false>
- vector_double get_CDF(const T* split_points, uint32_t size) const;
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Computes size needed to serialize the current state of the sketch.
@@ -471,8 +444,7 @@ public:
const_iterator begin() const;
const_iterator end() const;
- template<bool inclusive = false>
- quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
+ quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
private:
using Level = std::vector<T, Allocator>;
@@ -487,7 +459,7 @@ private:
* || 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |
* 1 ||---------------------------Items Seen Count (N)--------------------------------|
*
- * Long 3 is the start of data, beginning with serialized min and max values, followed by
+ * Long 3 is the start of data, beginning with serialized min and max item, followed by
* the sketch data buffers.
*/
@@ -504,21 +476,25 @@ private:
static const size_t DATA_START = 16;
Allocator allocator_;
+ bool is_level_zero_sorted_;
uint16_t k_;
uint64_t n_;
uint64_t bit_pattern_;
Level base_buffer_;
VectorLevels levels_;
- T* min_value_;
- T* max_value_;
- bool is_sorted_;
+ T* min_item_;
+ T* max_item_;
+ mutable quantile_sketch_sorted_view<T, Comparator, Allocator>* sorted_view_;
+
+ void setup_sorted_view() const; // modifies mutable state
+ void reset_sorted_view();
// for deserialization
class item_deleter;
class items_deleter;
quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
Level&& base_buffer, VectorLevels&& levels,
- std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value,
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
bool is_sorted, const Allocator& allocator = Allocator());
void grow_base_buffer();
@@ -549,7 +525,7 @@ private:
static uint32_t compute_retained_items(uint16_t k, uint64_t n);
static uint32_t compute_base_buffer_items(uint16_t k, uint64_t n);
static uint64_t compute_bit_pattern(uint16_t k, uint64_t n);
- static uint32_t compute_valid_levels(uint64_t bit_pattern);
+ static uint32_t count_valid_levels(uint64_t bit_pattern);
static uint8_t compute_levels_needed(uint16_t k, uint64_t n);
/**
@@ -588,8 +564,8 @@ private:
}
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static inline bool check_update_value(TT value) {
- return !std::isnan(value);
+ static inline bool check_update_item(TT item) {
+ return !std::isnan(item);
}
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
@@ -611,15 +587,15 @@ private:
}
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static inline bool check_update_value(TT) {
+ static inline bool check_update_item(TT) {
return true;
}
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static inline void check_split_points(const T* values, uint32_t size) {
+ static inline void check_split_points(const T* items, uint32_t size) {
for (uint32_t i = 0; i < size ; i++) {
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
- throw std::invalid_argument("Values must be unique and monotonically increasing");
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
+ throw std::invalid_argument("Items must be unique and monotonically increasing");
}
}
}
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index f1df570..15cc022 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -36,14 +36,15 @@ namespace datasketches {
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const A& allocator):
allocator_(allocator),
+is_level_zero_sorted_(true),
k_(k),
n_(0),
bit_pattern_(0),
base_buffer_(allocator_),
levels_(allocator_),
-min_value_(nullptr),
-max_value_(nullptr),
-is_sorted_(true)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
check_k(k_);
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
@@ -52,17 +53,18 @@ is_sorted_(true)
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch& other):
allocator_(other.allocator_),
+is_level_zero_sorted_(other.is_level_zero_sorted_),
k_(other.k_),
n_(other.n_),
bit_pattern_(other.bit_pattern_),
base_buffer_(other.base_buffer_),
levels_(other.levels_),
-min_value_(nullptr),
-max_value_(nullptr),
-is_sorted_(other.is_sorted_)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
for (size_t i = 0; i < levels_.size(); ++i) {
if (levels_[i].capacity() != other.levels_[i].capacity()) {
levels_[i].reserve(other.levels_[i].capacity());
@@ -73,62 +75,66 @@ is_sorted_(other.is_sorted_)
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::quantiles_sketch(quantiles_sketch&& other) noexcept:
allocator_(other.allocator_),
+is_level_zero_sorted_(other.is_level_zero_sorted_),
k_(other.k_),
n_(other.n_),
bit_pattern_(other.bit_pattern_),
base_buffer_(std::move(other.base_buffer_)),
levels_(std::move(other.levels_)),
-min_value_(other.min_value_),
-max_value_(other.max_value_),
-is_sorted_(other.is_sorted_)
+min_item_(other.min_item_),
+max_item_(other.max_item_),
+sorted_view_(nullptr)
{
- other.min_value_ = nullptr;
- other.max_value_ = nullptr;
+ other.min_item_ = nullptr;
+ other.max_item_ = nullptr;
}
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
quantiles_sketch<T, C, A> copy(other);
std::swap(allocator_, copy.allocator_);
+ std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
std::swap(k_, copy.k_);
std::swap(n_, copy.n_);
std::swap(bit_pattern_, copy.bit_pattern_);
std::swap(base_buffer_, copy.base_buffer_);
std::swap(levels_, copy.levels_);
- std::swap(min_value_, copy.min_value_);
- std::swap(max_value_, copy.max_value_);
- std::swap(is_sorted_, copy.is_sorted_);
+ std::swap(min_item_, copy.min_item_);
+ std::swap(max_item_, copy.max_item_);
+ reset_sorted_view();
return *this;
}
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch&& other) noexcept {
std::swap(allocator_, other.allocator_);
+ std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
std::swap(k_, other.k_);
std::swap(n_, other.n_);
std::swap(bit_pattern_, other.bit_pattern_);
std::swap(base_buffer_, other.base_buffer_);
std::swap(levels_, other.levels_);
- std::swap(min_value_, other.min_value_);
- std::swap(max_value_, other.max_value_);
- std::swap(is_sorted_, other.is_sorted_);
+ std::swap(min_item_, other.min_item_);
+ std::swap(max_item_, other.max_item_);
+ reset_sorted_view();
return *this;
}
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
Level&& base_buffer, VectorLevels&& levels,
- std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value,
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
bool is_sorted, const A& allocator) :
allocator_(allocator),
+is_level_zero_sorted_(is_sorted),
k_(k),
n_(n),
bit_pattern_(bit_pattern),
base_buffer_(std::move(base_buffer)),
levels_(std::move(levels)),
-min_value_(min_value.release()),
-max_value_(max_value.release()),
-is_sorted_(is_sorted)
+min_item_(min_item.release()),
+max_item_(max_item.release()),
+sorted_view_(nullptr)
{
uint32_t item_count = base_buffer_.size();
for (Level& lvl : levels_) {
@@ -142,14 +148,15 @@ template<typename T, typename C, typename A>
template<typename From, typename FC, typename FA>
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
allocator_(allocator),
+is_level_zero_sorted_(false),
k_(other.get_k()),
n_(other.get_n()),
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
base_buffer_(allocator),
levels_(allocator),
-min_value_(nullptr),
-max_value_(nullptr),
-is_sorted_(false)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
static_assert(std::is_constructible<T, From>::value,
"Type converting constructor requires new type to be constructible from existing type");
@@ -157,8 +164,8 @@ is_sorted_(false)
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
if (!other.is_empty()) {
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
// reserve space in levels
const uint8_t num_levels = compute_levels_needed(k_, n_);
@@ -199,40 +206,38 @@ is_sorted_(false)
template<typename T, typename C, typename A>
quantiles_sketch<T, C, A>::~quantiles_sketch() {
- if (min_value_ != nullptr) {
- min_value_->~T();
- allocator_.deallocate(min_value_, 1);
+ if (min_item_ != nullptr) {
+ min_item_->~T();
+ allocator_.deallocate(min_item_, 1);
}
- if (max_value_ != nullptr) {
- max_value_->~T();
- allocator_.deallocate(max_value_, 1);
+ if (max_item_ != nullptr) {
+ max_item_->~T();
+ allocator_.deallocate(max_item_, 1);
}
+ reset_sorted_view();
}
template<typename T, typename C, typename A>
template<typename FwdT>
void quantiles_sketch<T, C, A>::update(FwdT&& item) {
- if (!check_update_value(item)) { return; }
+ if (!check_update_item(item)) { return; }
if (is_empty()) {
- min_value_ = new (allocator_.allocate(1)) T(item);
- max_value_ = new (allocator_.allocate(1)) T(item);
+ min_item_ = new (allocator_.allocate(1)) T(item);
+ max_item_ = new (allocator_.allocate(1)) T(item);
} else {
- if (C()(item, *min_value_)) *min_value_ = item;
- if (C()(*max_value_, item)) *max_value_ = item;
+ if (C()(item, *min_item_)) *min_item_ = item;
+ if (C()(*max_item_, item)) *max_item_ = item;
}
// if exceed capacity, grow until size 2k -- assumes eager processing
- if (base_buffer_.size() + 1 > base_buffer_.capacity())
- grow_base_buffer();
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) grow_base_buffer();
base_buffer_.push_back(std::forward<FwdT>(item));
++n_;
- if (base_buffer_.size() > 1)
- is_sorted_ = false;
-
- if (base_buffer_.size() == 2 * k_)
- process_full_base_buffer();
+ if (base_buffer_.size() > 1) is_level_zero_sorted_ = false;
+ if (base_buffer_.size() == 2 * k_) process_full_base_buffer();
+ reset_sorted_view();
}
template<typename T, typename C, typename A>
@@ -245,10 +250,11 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
for (auto item : other.base_buffer_) {
update(conditional_forward<FwdSk>(item));
}
- return; // we're done
+ reset_sorted_view();
+ return;
}
- // we know other has data and is in estimation mode
+ // other has data and is in estimation mode
if (is_estimation_mode()) {
if (k_ == other.get_k()) {
standard_merge(*this, other);
@@ -273,6 +279,7 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
}
*this = sk_copy;
}
+ reset_sorted_view();
}
template<typename T, typename C, typename A>
@@ -286,8 +293,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
write(os, family);
// side-effect: sort base buffer since always compact
- // can't set is_sorted_ since const method
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+ const_cast<quantiles_sketch*>(this)->is_level_zero_sorted_ = true;
// empty, ordered, compact are valid flags
const uint8_t flags_byte(
@@ -304,8 +311,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
write(os, n_);
// min and max
- serde.serialize(os, min_value_, 1);
- serde.serialize(os, max_value_, 1);
+ serde.serialize(os, min_item_, 1);
+ serde.serialize(os, max_item_, 1);
// base buffer items
serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
@@ -334,8 +341,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
ptr += copy_to_mem(family, ptr);
// side-effect: sort base buffer since always compact
- // can't set is_sorted_ since const method
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+ const_cast<quantiles_sketch*>(this)->is_level_zero_sorted_ = true;
// empty, ordered, compact are valid flags
const uint8_t flags_byte(
@@ -352,8 +359,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
ptr += copy_to_mem(n_, ptr);
// min and max
- ptr += serde.serialize(ptr, end_ptr - ptr, min_value_, 1);
- ptr += serde.serialize(ptr, end_ptr - ptr, max_value_, 1);
+ ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
+ ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
// base buffer items
if (base_buffer_.size() > 0)
@@ -397,17 +404,17 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
A alloc(allocator);
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
- serde.deserialize(is, min_value_buffer.get(), 1);
+ serde.deserialize(is, min_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- serde.deserialize(is, max_value_buffer.get(), 1);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ serde.deserialize(is, max_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
if (serial_version == 1) {
read<uint64_t>(is); // no longer used
@@ -449,7 +456,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
}
return quantiles_sketch(k, items_seen, bit_pattern,
- std::move(base_buffer), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
+ std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted, allocator);
}
template<typename T, typename C, typename A>
@@ -510,17 +517,17 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
A alloc(allocator);
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
- ptr += serde.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
+ ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- ptr += serde.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
if (serial_version == 1) {
uint64_t unused_long;
@@ -567,7 +574,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
}
return quantiles_sketch(k, items_seen, bit_pattern,
- std::move(base_buffer_pair.first), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
+ std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted, allocator);
}
template<typename T, typename C, typename A>
@@ -605,11 +612,11 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
os << " Levels (w/o BB): " << levels_.size() << std::endl;
- os << " Used Levels : " << compute_valid_levels(bit_pattern_) << std::endl;
+ os << " Used Levels : " << count_valid_levels(bit_pattern_) << std::endl;
os << " Retained items : " << get_num_retained() << std::endl;
if (!is_empty()) {
- os << " Min value : " << *min_value_ << std::endl;
- os << " Max value : " << *max_value_ << std::endl;
+ os << " Min item : " << *min_item_ << std::endl;
+ os << " Max item : " << *max_item_ << std::endl;
}
os << "### End sketch summary" << std::endl;
@@ -667,15 +674,15 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
}
template<typename T, typename C, typename A>
-const T& quantiles_sketch<T, C, A>::get_min_value() const {
+const T& quantiles_sketch<T, C, A>::get_min_item() const {
if (is_empty()) return get_invalid_value();
- return *min_value_;
+ return *min_item_;
}
template<typename T, typename C, typename A>
-const T& quantiles_sketch<T, C, A>::get_max_value() const {
+const T& quantiles_sketch<T, C, A>::get_max_item() const {
if (is_empty()) return get_invalid_value();
- return *max_value_;
+ return *max_item_;
}
template<typename T, typename C, typename A>
@@ -702,8 +709,8 @@ template<typename SerDe, typename TT, typename std::enable_if<!std::is_arithmeti
size_t quantiles_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& serde) const {
if (is_empty()) { return EMPTY_SIZE_BYTES; }
size_t size = DATA_START;
- size += serde.size_of_item(*min_value_);
- size += serde.size_of_item(*max_value_);
+ size += serde.size_of_item(*min_item_);
+ size += serde.size_of_item(*max_item_);
for (auto it: *this) size += serde.size_of_item(it.first);
return size;
}
@@ -721,111 +728,83 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view(bool cumulative) const {
- // allow side-effect of sorting the base buffer; can't set the flag since
- // this is a const method
- if (!is_sorted_) {
+quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() const {
+ // allow side-effect of sorting the base buffer
+ if (!is_level_zero_sorted_) {
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
+ const_cast<quantiles_sketch*>(this)->is_level_zero_sorted_ = true;
}
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
uint64_t weight = 1;
view.add(base_buffer_.begin(), base_buffer_.end(), weight);
- for (auto& level : levels_) {
+ for (const auto& level: levels_) {
weight <<= 1;
if (level.empty()) { continue; }
view.add(level.begin(), level.end(), weight);
}
- if (cumulative) view.template convert_to_cummulative<inclusive>();
+ view.convert_to_cummulative();
return view;
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-auto quantiles_sketch<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
+auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
if (is_empty()) return get_invalid_value();
- if (rank == 0.0) return *min_value_;
- if (rank == 1.0) return *max_value_;
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
// possible side-effect: sorting base buffer
- return get_sorted_view<inclusive>(true).get_quantile(rank);
+ setup_sorted_view();
+ return sorted_view_->get_quantile(rank, inclusive);
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size) const {
+std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
std::vector<T, A> quantiles(allocator_);
if (is_empty()) return quantiles;
quantiles.reserve(size);
// possible side-effect: sorting base buffer
- auto view = get_sorted_view<inclusive>(true);
+ setup_sorted_view();
for (uint32_t i = 0; i < size; ++i) {
const double rank = ranks[i];
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
- }
- if (rank == 0.0) quantiles.push_back(*min_value_);
- else if (rank == 1.0) quantiles.push_back(*max_value_);
- else {
- quantiles.push_back(view.get_quantile(rank));
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
}
return quantiles;
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num) const {
+std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
if (is_empty()) return std::vector<T, A>(allocator_);
if (num == 0) {
throw std::invalid_argument("num must be > 0");
}
- vector_double fractions(num, 0, allocator_);
- fractions[0] = 0.0;
+ vector_double ranks(num, 0, allocator_);
+ ranks[0] = 0.0;
for (size_t i = 1; i < num; i++) {
- fractions[i] = static_cast<double>(i) / (num - 1);
+ ranks[i] = static_cast<double>(i) / (num - 1);
}
if (num > 1) {
- fractions[num - 1] = 1.0;
+ ranks[num - 1] = 1.0;
}
- return get_quantiles<inclusive>(fractions.data(), num);
+ return get_quantiles(ranks.data(), num, inclusive);
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-double quantiles_sketch<T, C, A>::get_rank(const T& value) const {
+double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
- uint64_t weight = 1;
- uint64_t total = 0;
- for (const T &item: base_buffer_) {
- if (inclusive ? !C()(value, item) : C()(item, value))
- total += weight;
- }
-
- weight *= 2;
- for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
- if (levels_[level].empty()) { continue; }
- const T* data = levels_[level].data();
- for (uint16_t i = 0; i < k_; ++i) {
- if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
- total += weight;
- else
- break; // levels are sorted, no point comparing further
- }
- }
- return (double) total / n_;
+ setup_sorted_view();
+ return sorted_view_->get_rank(item, inclusive);
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
- auto buckets = get_CDF<inclusive>(split_points, size);
+auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ auto buckets = get_CDF(split_points, size, inclusive);
if (is_empty()) return buckets;
for (uint32_t i = size; i > 0; --i) {
buckets[i] -= buckets[i - 1];
@@ -834,49 +813,45 @@ auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size) co
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
+auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
vector_double buckets(allocator_);
if (is_empty()) return buckets;
check_split_points(split_points, size);
buckets.reserve(size + 1);
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
+ for (uint32_t i = 0; i < size; ++i) {
+ buckets.push_back(get_rank(split_points[i], inclusive));
+ }
buckets.push_back(1);
return buckets;
}
template<typename T, typename C, typename A>
-uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
+uint32_t quantiles_sketch<T, C, A>::compute_retained_items(uint16_t k, uint64_t n) {
const uint32_t bb_count = compute_base_buffer_items(k, n);
const uint64_t bit_pattern = compute_bit_pattern(k, n);
- const uint32_t valid_levels = compute_valid_levels(bit_pattern);
+ const uint32_t valid_levels = count_valid_levels(bit_pattern);
return bb_count + (k * valid_levels);
}
template<typename T, typename C, typename A>
-uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(const uint16_t k, const uint64_t n) {
+uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(uint16_t k, uint64_t n) {
return n % (static_cast<uint64_t>(2) * k);
}
template<typename T, typename C, typename A>
-uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(const uint16_t k, const uint64_t n) {
+uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(uint16_t k, uint64_t n) {
return n / (static_cast<uint64_t>(2) * k);
}
template<typename T, typename C, typename A>
-uint32_t quantiles_sketch<T, C, A>::compute_valid_levels(const uint64_t bit_pattern) {
- // TODO: Java's Long.bitCount() probably uses a better method
- uint64_t bp = bit_pattern;
+uint32_t quantiles_sketch<T, C, A>::count_valid_levels(uint64_t bit_pattern) {
uint32_t count = 0;
- while (bp > 0) {
- if ((bp & 0x01) == 1) ++count;
- bp >>= 1;
- }
+ for (; bit_pattern > 0; ++count) bit_pattern &= bit_pattern - 1;
return count;
}
template<typename T, typename C, typename A>
-uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(const uint16_t k, const uint64_t n) {
+uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(uint16_t k, uint64_t n) {
return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
}
@@ -967,7 +942,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
base_buffer_,
true, *this);
base_buffer_.clear();
- is_sorted_ = true;
+ is_level_zero_sorted_ = true;
if (n_ / (2 * k_) != bit_pattern_) {
throw std::logic_error("Internal error: n / 2k (" + std::to_string(n_ / 2 * k_)
+ " != bit_pattern " + std::to_string(bit_pattern_));
@@ -1071,7 +1046,6 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
// do not clear input buffer
}
-
template<typename T, typename C, typename A>
void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2, Level& dst) {
if (src_1.size() != src_2.size()
@@ -1100,7 +1074,6 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
}
}
-
template<typename T, typename C, typename A>
template<typename FwdSk>
void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1152,22 +1125,21 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
// update min and max values
// can't just check is_empty() since min and max might not have been set if
// there were no base buffer items added via update()
- if (tgt.min_value_ == nullptr) {
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
+ if (tgt.min_item_ == nullptr) {
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
} else {
- if (C()(*src.min_value_, *tgt.min_value_))
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
+ if (C()(*src.min_item_, *tgt.min_item_))
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
}
- if (tgt.max_value_ == nullptr) {
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
+ if (tgt.max_item_ == nullptr) {
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
} else {
- if (C()(*tgt.max_value_, *src.max_value_))
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
+ if (C()(*tgt.max_item_, *src.max_item_))
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
}
}
-
template<typename T, typename C, typename A>
template<typename FwdSk>
void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1229,22 +1201,21 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
// update min and max values
// can't just check is_empty() since min and max might not have been set if
// there were no base buffer items added via update()
- if (tgt.min_value_ == nullptr) {
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
+ if (tgt.min_item_ == nullptr) {
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
} else {
- if (C()(*src.min_value_, *tgt.min_value_))
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
+ if (C()(*src.min_item_, *tgt.min_item_))
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
}
- if (tgt.max_value_ == nullptr) {
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
+ if (tgt.max_item_ == nullptr) {
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
} else {
- if (C()(*tgt.max_value_, *src.max_value_))
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
+ if (C()(*tgt.max_item_, *src.max_item_))
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
}
}
-
template<typename T, typename C, typename A>
uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit) {
uint8_t pos = starting_bit & 0X3F;
@@ -1292,6 +1263,23 @@ class quantiles_sketch<T, C, A>::items_deleter {
size_t num_;
};
+template<typename T, typename C, typename A>
+void quantiles_sketch<T, C, A>::setup_sorted_view() const {
+ if (sorted_view_ == nullptr) {
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantile_sketch_sorted_view<T, C, A>(get_sorted_view());
+ }
+}
+
+template<typename T, typename C, typename A>
+void quantiles_sketch<T, C, A>::reset_sorted_view() {
+ if (sorted_view_ != nullptr) {
+ sorted_view_->~quantile_sketch_sorted_view();
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
+ sorted_view_ = nullptr;
+ }
+}
// quantiles_sketch::const_iterator implementation
diff --git a/quantiles/test/CMakeLists.txt b/quantiles/test/CMakeLists.txt
index 1075c26..008ac83 100644
--- a/quantiles/test/CMakeLists.txt
+++ b/quantiles/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(quantiles_test)
-target_link_libraries(quantiles_test quantiles common common_test)
+target_link_libraries(quantiles_test quantiles common common_test_lib)
set_target_properties(quantiles_test PROPERTIES
CXX_STANDARD 11
diff --git a/quantiles/test/quantiles_compatibility_test.cpp b/quantiles/test/quantiles_compatibility_test.cpp
index ea259cf..dc537ba 100644
--- a/quantiles/test/quantiles_compatibility_test.cpp
+++ b/quantiles/test/quantiles_compatibility_test.cpp
@@ -73,50 +73,50 @@ TEST_CASE("quantiles compatibility", "[quantiles_compatibility]") {
SECTION("Qk128_n50_v0.3.0.sk") {
// file: Qk128_n50_v0.3.0.sk
- // median: 26.0
- quantiles_decode_and_check(128, 50, "0.3.0", 26.0);
+ // median: 25
+ quantiles_decode_and_check(128, 50, "0.3.0", 25);
}
SECTION("Qk128_n1000_v0.3.0.sk") {
// file: Qk128_n1000_v0.3.0.sk
- // median: 501.0
- quantiles_decode_and_check(128, 1000, "0.3.0", 501.0);
+ // median: ~500
+ quantiles_decode_and_check(128, 1000, "0.3.0", 497);
}
SECTION("Qk128_n50_v0.6.0.sk") {
// file: Qk128_n50_v0.6.0.sk
- // median: 26.0
- quantiles_decode_and_check(128, 50, "0.6.0", 26.0);
+ // median: 25
+ quantiles_decode_and_check(128, 50, "0.6.0", 25);
}
SECTION("Qk128_n1000_v0.6.0.sk") {
// file: Qk128_n1000_v0.6.0.sk
- // median: 501.0
- quantiles_decode_and_check(128, 1000, "0.6.0", 501.0);
+ // median: ~500
+ quantiles_decode_and_check(128, 1000, "0.6.0", 497);
}
SECTION("Qk128_n50_v0.8.0.sk") {
// file: Qk128_n50_v0.8.0.sk
- // median: 26.0
- quantiles_decode_and_check(128, 50, "0.8.0", 26.0);
+ // median: 25
+ quantiles_decode_and_check(128, 50, "0.8.0", 25);
}
SECTION("Qk128_n1000_v0.8.0.sk") {
// file: Qk128_n1000_v0.8.0.sk
- // median: 501.0
- quantiles_decode_and_check(128, 1000, "0.8.0", 501.0);
+ // median: ~500
+ quantiles_decode_and_check(128, 1000, "0.8.0", 497);
}
SECTION("Qk128_n50_v0.8.3.sk") {
// file: Qk128_n50_v0.8.3.sk
- // median: 26.0
- quantiles_decode_and_check(128, 50, "0.8.3", 26.0);
+ // median: 25
+ quantiles_decode_and_check(128, 50, "0.8.3", 25);
}
SECTION("Qk128_n1000_v0.8.3.sk") {
// file: Qk128_n1000_v0.8.3.sk
- // median: 501.0
- quantiles_decode_and_check(128, 1000, "0.8.3", 501.0);
+ // median: ~500
+ quantiles_decode_and_check(128, 1000, "0.8.3", 497);
}
// cleanup
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index 8d4a3ed..75c2e89 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -61,8 +61,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
REQUIRE(std::isnan(sketch.get_rank(0)));
- REQUIRE(std::isnan(sketch.get_min_value()));
- REQUIRE(std::isnan(sketch.get_max_value()));
+ REQUIRE(std::isnan(sketch.get_min_item()));
+ REQUIRE(std::isnan(sketch.get_max_item()));
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
const double fractions[3] {0, 0.5, 1};
REQUIRE(sketch.get_quantiles(fractions, 3).empty());
@@ -89,10 +89,12 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_rank(1.0f) == 0.0);
- REQUIRE(sketch.get_rank(2.0f) == 1.0);
- REQUIRE(sketch.get_min_value() == 1.0);
- REQUIRE(sketch.get_max_value() == 1.0);
+ REQUIRE(sketch.get_rank(0) == 0);
+ REQUIRE(sketch.get_rank(1.0f) == 1);
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
+ REQUIRE(sketch.get_rank(2.0f, false) == 1);
+ REQUIRE(sketch.get_min_item() == 1.0);
+ REQUIRE(sketch.get_max_item() == 1.0);
REQUIRE(sketch.get_quantile(0.5) == 1.0);
const double fractions[3] {0, 0.5, 1};
auto quantiles = sketch.get_quantiles(fractions, 3);
@@ -139,9 +141,9 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch.is_empty());
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_num_retained() == n);
- REQUIRE(sketch.get_min_value() == 0.0);
+ REQUIRE(sketch.get_min_item() == 0.0);
REQUIRE(sketch.get_quantile(0) == 0.0);
- REQUIRE(sketch.get_max_value() == n - 1);
+ REQUIRE(sketch.get_max_item() == n - 1);
REQUIRE(sketch.get_quantile(1) == n - 1);
int count = 0;
@@ -151,24 +153,24 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
REQUIRE(count == n);
- const double fractions[3] {0, 0.5, 1};
- auto quantiles = sketch.get_quantiles(fractions, 3);
+ const double ranks[3] {0, 0.5, 1};
+ auto quantiles = sketch.get_quantiles(ranks, 3);
REQUIRE(quantiles.size() == 3);
REQUIRE(quantiles[0] == 0.0);
REQUIRE(quantiles[1] == static_cast<float>(n / 2));
REQUIRE(quantiles[2] == n - 1 );
- for (uint32_t i = 0; i < n; i++) {
- const double trueRank = (double) i / n;
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == trueRank);
- }
-
// the alternative method must produce the same result
auto quantiles2 = sketch.get_quantiles(3);
REQUIRE(quantiles2.size() == 3);
REQUIRE(quantiles[0] == quantiles2[0]);
REQUIRE(quantiles[1] == quantiles2[1]);
REQUIRE(quantiles[2] == quantiles2[2]);
+
+ for (uint32_t i = 0; i < n; i++) {
+ const double trueRank = static_cast<double>(i + 1) / n;
+ REQUIRE(sketch.get_rank(static_cast<float>(i)) == trueRank);
+ }
}
SECTION("10 items") {
@@ -183,20 +185,20 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch.update(8.0f);
sketch.update(9.0f);
sketch.update(10.0f);
- REQUIRE(sketch.get_quantile(0) == 1.0);
- REQUIRE(sketch.get_quantile(0.5) == 6.0);
- REQUIRE(sketch.get_quantile(0.99) == 10.0);
- REQUIRE(sketch.get_quantile(1) == 10.0);
+ REQUIRE(sketch.get_quantile(0) == 1);
+ REQUIRE(sketch.get_quantile(0.5) == 5);
+ REQUIRE(sketch.get_quantile(0.99) == 10);
+ REQUIRE(sketch.get_quantile(1) == 10);
}
- SECTION("100 items") {
+ SECTION("100 items, exact mode") {
quantiles_float_sketch sketch(128, 0);
- for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
- REQUIRE(sketch.get_quantile(0) == 0);
+ for (int i = 1; i <= 100; ++i) sketch.update(static_cast<float>(i));
+ REQUIRE(sketch.get_quantile(0) == 1);
REQUIRE(sketch.get_quantile(0.01) == 1);
REQUIRE(sketch.get_quantile(0.5) == 50);
- REQUIRE(sketch.get_quantile(0.99) == 99.0);
- REQUIRE(sketch.get_quantile(1) == 99.0);
+ REQUIRE(sketch.get_quantile(0.99) == 99);
+ REQUIRE(sketch.get_quantile(1) == 100);
}
SECTION("many items, estimation mode") {
@@ -208,31 +210,28 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
}
REQUIRE_FALSE(sketch.is_empty());
REQUIRE(sketch.is_estimation_mode());
- REQUIRE(sketch.get_min_value() == 0.0); // min value is exact
- REQUIRE(sketch.get_quantile(0) == 0.0); // min value is exact
- REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
- REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
+ REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
+ REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
// test rank
for (int i = 0; i < n; i++) {
- const double trueRank = static_cast<float>(i) / n;
+ const double trueRank = static_cast<float>(i + 1) / n;
const double sketchRank = sketch.get_rank(static_cast<float>(i));
REQUIRE(sketchRank == Approx(trueRank).margin(RANK_EPS_FOR_K_128));
}
// test quantiles at every 0.1 percentage point
- double fractions[1001];
- double reverse_fractions[1001]; // check that ordering does not matter
+ double ranks[1001];
+ double reverse_ranks[1001]; // check that ordering does not matter
for (int i = 0; i < 1001; i++) {
- fractions[i] = (double) i / 1000;
- reverse_fractions[1000 - i] = fractions[i];
+ ranks[i] = (double) i / 1000;
+ reverse_ranks[1000 - i] = ranks[i];
}
- auto quantiles = sketch.get_quantiles(fractions, 1001);
- auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
- float previous_quantile(0);
+ auto quantiles = sketch.get_quantiles(ranks, 1001);
+ auto reverse_quantiles = sketch.get_quantiles(reverse_ranks, 1001);
+ float previous_quantile = 0;
for (int i = 0; i < 1001; i++) {
- // expensive in a loop, just to check the equivalence here, not advised for real code
- const float quantile = sketch.get_quantile(fractions[i]);
+ const float quantile = sketch.get_quantile(ranks[i]);
REQUIRE(quantiles[i] == quantile);
REQUIRE(reverse_quantiles[1000 - i] == quantile);
REQUIRE(previous_quantile <= quantile);
@@ -287,23 +286,23 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
// get_rank()
// using knowledge of internal structure
// value still in the base buffer to avoid randomness
- REQUIRE(sketch.get_rank<false>(80) == 0.79);
- REQUIRE(sketch.get_rank<true>(80) == 0.80);
+ REQUIRE(sketch.get_rank(80, false) == 0.79);
+ REQUIRE(sketch.get_rank(80, true) == 0.80);
// value pushed into higher level
- REQUIRE(sketch.get_rank<false>(50) == Approx(0.49).margin(0.01));
- REQUIRE(sketch.get_rank<true>(50) == 0.50);
+ REQUIRE(sketch.get_rank(50, false) == Approx(0.49).margin(0.01));
+ REQUIRE(sketch.get_rank(50, true) == 0.50);
// get_quantile()
// value still in base buffer
- REQUIRE(sketch.get_quantile<false>(0.70) == 71);
- REQUIRE(sketch.get_quantile<true>(0.70) == 70);
+ REQUIRE(sketch.get_quantile(0.70, false) == 71);
+ REQUIRE(sketch.get_quantile(0.70, true) == 70);
// value pushed into higher levell
- int quantile = sketch.get_quantile<false>(0.30);
+ int quantile = sketch.get_quantile(0.30, false);
if (quantile != 31 && quantile != 32) { FAIL(); }
- quantile = sketch.get_quantile<true>(0.30);
+ quantile = sketch.get_quantile(0.30, true);
if (quantile != 29 && quantile != 30) { FAIL(); }
}
@@ -319,8 +318,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -334,8 +333,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
}
@@ -353,11 +352,12 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 1);
REQUIRE(sketch2.get_num_retained() == 1);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 1.0);
- REQUIRE(sketch2.get_quantile(0.5) == 1.0);
- REQUIRE(sketch2.get_rank(1) == 0.0);
- REQUIRE(sketch2.get_rank(2) == 1.0);
+ REQUIRE(sketch2.get_min_item() == 1);
+ REQUIRE(sketch2.get_max_item() == 1);
+ REQUIRE(sketch2.get_quantile(0.5) == 1);
+ REQUIRE(sketch2.get_rank(0) == 0);
+ REQUIRE(sketch2.get_rank(1) == 1);
+ REQUIRE(sketch2.get_rank(2) == 1);
}
SECTION("bytes serialize deserialize one item") {
@@ -371,11 +371,12 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 1);
REQUIRE(sketch2.get_num_retained() == 1);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 1.0);
- REQUIRE(sketch2.get_quantile(0.5) == 1.0);
- REQUIRE(sketch2.get_rank(1) == 0.0);
- REQUIRE(sketch2.get_rank(2) == 1.0);
+ REQUIRE(sketch2.get_min_item() == 1);
+ REQUIRE(sketch2.get_max_item() == 1);
+ REQUIRE(sketch2.get_quantile(0.5) == 1);
+ REQUIRE(sketch2.get_rank(0) == 0);
+ REQUIRE(sketch2.get_rank(1) == 1);
+ REQUIRE(sketch2.get_rank(2) == 1);
}
SECTION("stream serialize deserialize three items") {
@@ -393,8 +394,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 3);
REQUIRE(sketch2.get_num_retained() == 3);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 3.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 3.0);
}
SECTION("bytes serialize deserialize three items") {
@@ -410,8 +411,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() == 3);
REQUIRE(sketch2.get_num_retained() == 3);
- REQUIRE(sketch2.get_min_value() == 1.0);
- REQUIRE(sketch2.get_max_value() == 3.0);
+ REQUIRE(sketch2.get_min_item() == 1.0);
+ REQUIRE(sketch2.get_max_item() == 3.0);
}
SECTION("stream serialize deserialize many floats") {
@@ -428,8 +429,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -448,8 +449,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -472,8 +473,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -514,17 +515,17 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch2.update(static_cast<float>((2 * n) - i - 1));
}
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
- REQUIRE(sketch2.get_min_value() == n);
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
+ REQUIRE(sketch2.get_min_item() == n);
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
sketch1.merge(sketch2);
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == 2 * n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
}
@@ -537,17 +538,17 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch2.update(static_cast<float>((2 * n) - i - 1));
}
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
- REQUIRE(sketch2.get_min_value() == n);
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
+ REQUIRE(sketch2.get_min_item() == n);
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
sketch1.merge(const_cast<const quantiles_float_sketch&>(sketch2));
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == 2 * n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
}
@@ -561,10 +562,10 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch2.update(static_cast<float>((2 * n) - i - 1));
}
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
- REQUIRE(sketch2.get_min_value() == n);
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
+ REQUIRE(sketch2.get_min_item() == n);
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_k() == 256);
REQUIRE(sketch2.get_k() == 128);
@@ -580,8 +581,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == 2 * n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
}
@@ -600,8 +601,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE_FALSE(sketch1.is_empty());
REQUIRE(sketch1.get_n() == n);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == n - 1);
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == n - 1);
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_128));
sketch2.update(static_cast<float>(0));
@@ -616,8 +617,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.update(1.0f);
sketch2.update(2.0f);
sketch2.merge(sketch1);
- REQUIRE(sketch2.get_min_value() == 1.0f);
- REQUIRE(sketch2.get_max_value() == 2.0f);
+ REQUIRE(sketch2.get_min_item() == 1.0f);
+ REQUIRE(sketch2.get_max_item() == 2.0f);
}
SECTION("merge min and max values from other") {
@@ -625,8 +626,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
quantiles_float_sketch sketch2(128, 0);
sketch2.merge(sketch1);
- REQUIRE(sketch2.get_min_value() == 0.0f);
- REQUIRE(sketch2.get_max_value() == 999999.0f);
+ REQUIRE(sketch2.get_min_item() == 0.0f);
+ REQUIRE(sketch2.get_max_item() == 999999.0f);
}
SECTION("merge: two empty") {
@@ -658,8 +659,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 101 * k);
REQUIRE(sketch1.get_k() == 2 * k); // no reason to have shrunk
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
}
SECTION("merge: src estimation, tgt exact, tgt.k > src.k") {
@@ -679,8 +680,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 101 * k);
REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
}
SECTION("merge: both estimation, tgt.k < src.k") {
@@ -696,8 +697,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 200 * k);
REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
- REQUIRE(sketch1.get_min_value() == static_cast<float>(-100 * k + 1));
- REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+ REQUIRE(sketch1.get_min_item() == static_cast<float>(-100 * k + 1));
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
REQUIRE(sketch1.get_quantile(0.5) == Approx(0.0).margin(100 * k * RANK_EPS_FOR_K_128));
}
@@ -718,8 +719,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 100 * k);
REQUIRE(sketch1.get_k() == k);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == static_cast<float>(100 * k - 1));
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
float n = 100 * k - 1;
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_128));
}
@@ -738,8 +739,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 2 * n);
REQUIRE(sketch1.get_k() == k);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == static_cast<float>(2 * n - 1));
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(2 * n - 1));
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
}
@@ -757,16 +758,16 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
sketch1.merge(sketch2);
REQUIRE(sketch1.get_n() == 2 * n);
REQUIRE(sketch1.get_k() == k);
- REQUIRE(sketch1.get_min_value() == 0.0f);
- REQUIRE(sketch1.get_max_value() == static_cast<float>(2 * n - 1));
+ REQUIRE(sketch1.get_min_item() == 0.0f);
+ REQUIRE(sketch1.get_max_item() == static_cast<float>(2 * n - 1));
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
}
SECTION("sketch of ints") {
quantiles_sketch<int> sketch;
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
const int n = 10000;
for (int i = 0; i < n; i++) sketch.update(i);
@@ -781,8 +782,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch.get_n());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -793,15 +794,15 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
SECTION("sketch of strings stream") {
quantiles_string_sketch sketch1(128, 0);
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
const int n = 1000;
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- REQUIRE(sketch1.get_min_value() == std::string("0"));
- REQUIRE(sketch1.get_max_value() == std::string("999"));
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
sketch1.serialize(s);
@@ -813,8 +814,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -829,15 +830,15 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
SECTION("sketch of strings bytes") {
quantiles_string_sketch sketch1(128, 0);
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
const int n = 10000;
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
- REQUIRE(sketch1.get_min_value() == std::string("0"));
- REQUIRE(sketch1.get_max_value() == std::string("9999"));
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
+ REQUIRE(sketch1.get_max_item() == std::string("9999"));
auto bytes = sketch1.serialize();
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
@@ -847,8 +848,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -886,20 +887,20 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
SECTION("move") {
quantiles_sketch<int> sketch1;
- const int n(100);
+ const int n = 100;
for (int i = 0; i < n; i++) sketch1.update(i);
// move constructor
quantiles_sketch<int> sketch2(std::move(sketch1));
for (int i = 0; i < n; i++) {
- REQUIRE(sketch2.get_rank(i) == (double) i / n);
+ REQUIRE(sketch2.get_rank(i) == static_cast<double>(i + 1) / n);
}
// move assignment
quantiles_sketch<int> sketch3;
sketch3 = std::move(sketch2);
for (int i = 0; i < n; i++) {
- REQUIRE(sketch3.get_rank(i) == (double) i / n);
+ REQUIRE(sketch3.get_rank(i) == static_cast<double>(i + 1) / n);
}
}
@@ -918,10 +919,10 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
REQUIRE(sk_double.get_k() == sk_int.get_k());
REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
- auto sv_double = sk_double.get_sorted_view(false);
+ auto sv_double = sk_double.get_sorted_view();
std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
- auto sv_int = sk_int.get_sorted_view(false);
+ auto sv_int = sk_int.get_sorted_view();
std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
REQUIRE(vec_double.size() == vec_int.size());
diff --git a/req/include/req_compactor.hpp b/req/include/req_compactor.hpp
index c78784b..6ee172b 100755
--- a/req/include/req_compactor.hpp
+++ b/req/include/req_compactor.hpp
@@ -50,8 +50,7 @@ public:
T* begin();
T* end();
- template<bool inclusive>
- uint64_t compute_weight(const T& item) const;
+ uint64_t compute_weight(const T& item, bool inclusive) const;
template<typename FwdT>
void append(FwdT&& item);
diff --git a/req/include/req_compactor_impl.hpp b/req/include/req_compactor_impl.hpp
index d3747be..1d98b54 100755
--- a/req/include/req_compactor_impl.hpp
+++ b/req/include/req_compactor_impl.hpp
@@ -180,8 +180,7 @@ uint8_t req_compactor<T, C, A>::get_lg_weight() const {
}
template<typename T, typename C, typename A>
-template<bool inclusive>
-uint64_t req_compactor<T, C, A>::compute_weight(const T& item) const {
+uint64_t req_compactor<T, C, A>::compute_weight(const T& item, bool inclusive) const {
if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
auto it = inclusive ?
std::upper_bound(begin(), end(), item, C()) :
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index a71d7da..a900772 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -31,7 +31,6 @@ namespace datasketches {
template<
typename T,
typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
- typename S = serde<T>, // deprecated, to be removed in the next major version
typename Allocator = std::allocator<T>
>
class req_sketch {
@@ -63,8 +62,8 @@ public:
* @param other sketch of a different type
* @param allocator instance of an Allocator
*/
- template<typename TT, typename CC, typename SS, typename AA>
- explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
+ template<typename TT, typename CC, typename AA>
+ explicit req_sketch(const req_sketch<TT, CC, AA>& other, const Allocator& allocator = Allocator());
/**
* Returns configured parameter K
@@ -102,27 +101,35 @@ public:
*/
bool is_estimation_mode() const;
+ /**
+ * Updates this sketch with the given data item.
+ * @param item from a stream of items
+ */
template<typename FwdT>
void update(FwdT&& item);
+ /**
+ * Merges another sketch into this one.
+ * @param other sketch to merge into this one
+ */
template<typename FwdSk>
void merge(FwdSk&& other);
/**
- * Returns the min value of the stream.
+ * Returns the min item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the min value of the stream
+ * @return the min item of the stream
*/
- const T& get_min_value() const;
+ const T& get_min_item() const;
/**
- * Returns the max value of the stream.
+ * Returns the max item of the stream.
* For floating point types: if the sketch is empty this returns NaN.
* For other types: if the sketch is empty this throws runtime_error.
- * @return the max value of the stream
+ * @return the max item of the stream
*/
- const T& get_max_value() const;
+ const T& get_max_item() const;
/**
* Returns an instance of the comparator for this sketch.
@@ -131,84 +138,83 @@ public:
Comparator get_comparator() const;
/**
- * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
- * With the template parameter inclusive=true the weight of the given item is included into the rank.
- * Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
+ * Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
*
- * <p>If the sketch is empty this returns NaN.
+ * <p>If the sketch is empty the result is undefined (NaN).
+ *
+ * @param item to be ranked.
+ * @param inclusive if true the weight of the given item is included into the rank.
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
+ * according to the comparator C.
*
- * @param item to be ranked
* @return an approximate rank of the given item
*/
- template<bool inclusive = false>
- double get_rank(const T& item) const;
+ double get_rank(const T& item, bool inclusive = true) const;
/**
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
- * given a set of split points (values).
+ * given a set of split points (items).
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
- * split point, with the exception that the last interval will include the maximum value.
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
- * split point.
- * It is not necessary to include either the min or max values in these split points.
*
- * @return an array of m+1 doubles each of which is an approximation
- * to the fraction of the input stream values (the mass) that fall into one of those intervals.
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
- * split point, with the exception that the last interval will include the maximum value.
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ * @param size of the array of split points.
+ *
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * split point, with the exception that the last interval will include the maximum item.
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
* split point.
+ *
+ * @return an array of m+1 double values each of which is an approximation
+ * to the fraction of the input stream items (the mass) that fall into one of those intervals.
*/
- template<bool inclusive = false>
- vector_double get_PMF(const T* split_points, uint32_t size) const;
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
- * cumulative analog of the PMF, of the input stream given a set of split points (values).
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
*
* <p>If the sketch is empty this returns an empty vector.
*
- * @param split_points an array of <i>m</i> unique, monotonically increasing float values
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
- * split point, with the exception that the last interval will include the maximum value.
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+ *
+ * @param size of the array of split points.
+ *
+ * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+ * split point, with the exception that the last interval will include the maximum item.
+ * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
* split point.
- * It is not necessary to include either the min or max values in these split points.
*
* @return an array of m+1 double values, which are a consecutive approximation to the CDF
* of the input stream given the split_points. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
*/
- template<bool inclusive = false>
- vector_double get_CDF(const T* split_points, uint32_t size) const;
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
/**
* Returns an approximate quantile of the given normalized rank.
* The normalized rank must be in the range [0.0, 1.0] (both inclusive).
- * @param rank the given normalized rank
- * @return approximate quantile given the normalized rank
+ * @param rank of an item in the hypothetical sorted stream.
+ * @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
+ *
+ * @return approximate quantile associated with the given rank
*/
using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
- template<bool inclusive = false>
- quantile_return_type get_quantile(double rank) const;
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
/**
* Returns an array of quantiles that correspond to the given array of normalized ranks.
* @param ranks given array of normalized ranks.
* @return array of quantiles that correspond to the given array of normalized ranks
*/
- template<bool inclusive = false>
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size) const;
+ std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
/**
- * Returns an approximate lower bound of the given noramalized rank.
+ * Returns an approximate lower bound of the given normalized rank.
* @param rank the given rank, a value between 0 and 1.0.
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
* @return an approximate lower bound rank.
@@ -216,7 +222,7 @@ public:
double get_rank_lower_bound(double rank, uint8_t num_std_dev) const;
/**
- * Returns an approximate upper bound of the given noramalized rank.
+ * Returns an approximate upper bound of the given normalized rank.
* @param rank the given rank, a value between 0 and 1.0.
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
* @return an approximate upper bound rank.
@@ -242,7 +248,7 @@ public:
* @param instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
/**
@@ -251,7 +257,7 @@ public:
* @param instance of a SerDe
* @return size in bytes needed to serialize this sketch
*/
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
/**
@@ -259,7 +265,7 @@ public:
* @param os output stream
* @param instance of a SerDe
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
// This is a convenience alias for users
@@ -274,19 +280,9 @@ public:
* @param header_size_bytes space to reserve in front of the sketch
* @param instance of a SerDe
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
- /**
- * This method deserializes a sketch from a given stream.
- * @param is input stream
- * @param instance of an Allocator
- * @return an instance of a sketch
- *
- * Deprecated, to be removed in the next major version
- */
- static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
-
/**
* This method deserializes a sketch from a given stream.
* @param is input stream
@@ -294,20 +290,9 @@ public:
* @param instance of an Allocator
* @return an instance of a sketch
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
- /**
- * This method deserializes a sketch from a given array of bytes.
- * @param bytes pointer to the array of bytes
- * @param size the size of the array
- * @param instance of an Allocator
- * @return an instance of a sketch
- *
- * Deprecated, to be removed in the next major version
- */
- static req_sketch deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());
-
/**
* This method deserializes a sketch from a given array of bytes.
* @param bytes pointer to the array of bytes
@@ -316,7 +301,7 @@ public:
* @param instance of an Allocator
* @return an instance of a sketch
*/
- template<typename SerDe = S>
+ template<typename SerDe = serde<T>>
static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
/**
@@ -330,8 +315,7 @@ public:
const_iterator begin() const;
const_iterator end() const;
- template<bool inclusive = false>
- quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
+ quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
private:
Allocator allocator_;
@@ -341,8 +325,12 @@ private:
uint32_t num_retained_;
uint64_t n_;
std::vector<Compactor, AllocCompactor> compactors_;
- T* min_value_;
- T* max_value_;
+ T* min_item_;
+ T* max_item_;
+ mutable quantile_sketch_sorted_view<T, Comparator, Allocator>* sorted_view_;
+
+ void setup_sorted_view() const; // modifies mutable state
+ void reset_sorted_view();
static const bool LAZY_COMPRESSION = false;
@@ -366,7 +354,7 @@ private:
// for deserialization
class item_deleter;
- req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
+ req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item, std::vector<Compactor, AllocCompactor>&& compactors);
static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
static void check_serial_version(uint8_t serial_version);
@@ -380,17 +368,17 @@ private:
}
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static inline bool check_update_value(const TT& value) {
+ static inline bool check_update_item(const TT& value) {
return !std::isnan(value);
}
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
- static inline void check_split_points(const T* values, uint32_t size) {
+ static inline void check_split_points(const T* items, uint32_t size) {
for (uint32_t i = 0; i < size ; i++) {
- if (std::isnan(values[i])) {
+ if (std::isnan(items[i])) {
throw std::invalid_argument("Values must not be NaN");
}
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
throw std::invalid_argument("Values must be unique and monotonically increasing");
}
}
@@ -399,30 +387,29 @@ private:
// implementations for all other types
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
static const TT& get_invalid_value() {
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
+ throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of items");
}
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static inline bool check_update_value(const TT&) {
+ static inline bool check_update_item(const TT&) {
return true;
}
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
- static inline void check_split_points(const T* values, uint32_t size) {
+ static inline void check_split_points(const T* items, uint32_t size) {
for (uint32_t i = 0; i < size ; i++) {
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
- throw std::invalid_argument("Values must be unique and monotonically increasing");
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
+ throw std::invalid_argument("Items must be unique and monotonically increasing");
}
}
}
// for type converting constructor
- template<typename TT, typename CC, typename SS, typename AA>
- friend class req_sketch;
+ template<typename TT, typename CC, typename AA> friend class req_sketch;
};
-template<typename T, typename C, typename S, typename A>
-class req_sketch<T, C, S, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+template<typename T, typename C, typename A>
+class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
public:
const_iterator& operator++();
const_iterator& operator++(int);
@@ -434,7 +421,7 @@ private:
LevelsIterator levels_it_;
LevelsIterator levels_end_;
const T* compactor_it_;
- friend class req_sketch<T, C, S, A>;
+ friend class req_sketch<T, C, A>;
const_iterator(LevelsIterator begin, LevelsIterator end);
};
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index 09a192d..cd1309b 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -25,8 +25,8 @@
namespace datasketches {
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
allocator_(allocator),
k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
hra_(hra),
@@ -34,26 +34,28 @@ max_nom_size_(0),
num_retained_(0),
n_(0),
compactors_(allocator),
-min_value_(nullptr),
-max_value_(nullptr)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
grow();
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::~req_sketch() {
- if (min_value_ != nullptr) {
- min_value_->~T();
- allocator_.deallocate(min_value_, 1);
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::~req_sketch() {
+ if (min_item_ != nullptr) {
+ min_item_->~T();
+ allocator_.deallocate(min_item_, 1);
}
- if (max_value_ != nullptr) {
- max_value_->~T();
- allocator_.deallocate(max_value_, 1);
+ if (max_item_ != nullptr) {
+ max_item_->~T();
+ allocator_.deallocate(max_item_, 1);
}
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::req_sketch(const req_sketch& other):
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::req_sketch(const req_sketch& other):
allocator_(other.allocator_),
k_(other.k_),
hra_(other.hra_),
@@ -61,15 +63,16 @@ max_nom_size_(other.max_nom_size_),
num_retained_(other.num_retained_),
n_(other.n_),
compactors_(other.compactors_),
-min_value_(nullptr),
-max_value_(nullptr)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::req_sketch(req_sketch&& other) noexcept :
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
allocator_(std::move(other.allocator_)),
k_(other.k_),
hra_(other.hra_),
@@ -77,15 +80,16 @@ max_nom_size_(other.max_nom_size_),
num_retained_(other.num_retained_),
n_(other.n_),
compactors_(std::move(other.compactors_)),
-min_value_(other.min_value_),
-max_value_(other.max_value_)
+min_item_(other.min_item_),
+max_item_(other.max_item_),
+sorted_view_(nullptr)
{
- other.min_value_ = nullptr;
- other.max_value_ = nullptr;
+ other.min_item_ = nullptr;
+ other.max_item_ = nullptr;
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& other) {
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
req_sketch copy(other);
std::swap(allocator_, copy.allocator_);
std::swap(k_, copy.k_);
@@ -94,13 +98,14 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& othe
std::swap(num_retained_, copy.num_retained_);
std::swap(n_, copy.n_);
std::swap(compactors_, copy.compactors_);
- std::swap(min_value_, copy.min_value_);
- std::swap(max_value_, copy.max_value_);
+ std::swap(min_item_, copy.min_item_);
+ std::swap(max_item_, copy.max_item_);
+ reset_sorted_view();
return *this;
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(req_sketch&& other) {
std::swap(allocator_, other.allocator_);
std::swap(k_, other.k_);
std::swap(hra_, other.hra_);
@@ -108,14 +113,15 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
std::swap(num_retained_, other.num_retained_);
std::swap(n_, other.n_);
std::swap(compactors_, other.compactors_);
- std::swap(min_value_, other.min_value_);
- std::swap(max_value_, other.max_value_);
+ std::swap(min_item_, other.min_item_);
+ std::swap(max_item_, other.max_item_);
+ reset_sorted_view();
return *this;
}
-template<typename T, typename C, typename S, typename A>
-template<typename TT, typename CC, typename SS, typename AA>
-req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
+template<typename T, typename C, typename A>
+template<typename TT, typename CC, typename AA>
+req_sketch<T, C, A>::req_sketch(const req_sketch<TT, CC, AA>& other, const A& allocator):
allocator_(allocator),
k_(other.k_),
hra_(other.hra_),
@@ -123,8 +129,9 @@ max_nom_size_(other.max_nom_size_),
num_retained_(other.num_retained_),
n_(other.n_),
compactors_(allocator),
-min_value_(nullptr),
-max_value_(nullptr)
+min_item_(nullptr),
+max_item_(nullptr),
+sorted_view_(nullptr)
{
static_assert(
std::is_constructible<T, TT>::value,
@@ -135,69 +142,70 @@ max_value_(nullptr)
compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
}
if (!other.is_empty()) {
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
}
}
-template<typename T, typename C, typename S, typename A>
-uint16_t req_sketch<T, C, S, A>::get_k() const {
+template<typename T, typename C, typename A>
+uint16_t req_sketch<T, C, A>::get_k() const {
return k_;
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::is_HRA() const {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::is_HRA() const {
return hra_;
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::is_empty() const {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::is_empty() const {
return n_ == 0;
}
-template<typename T, typename C, typename S, typename A>
-uint64_t req_sketch<T, C, S, A>::get_n() const {
+template<typename T, typename C, typename A>
+uint64_t req_sketch<T, C, A>::get_n() const {
return n_;
}
-template<typename T, typename C, typename S, typename A>
-uint32_t req_sketch<T, C, S, A>::get_num_retained() const {
+template<typename T, typename C, typename A>
+uint32_t req_sketch<T, C, A>::get_num_retained() const {
return num_retained_;
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::is_estimation_mode() const {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::is_estimation_mode() const {
return compactors_.size() > 1;
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename FwdT>
-void req_sketch<T, C, S, A>::update(FwdT&& item) {
- if (!check_update_value(item)) { return; }
+void req_sketch<T, C, A>::update(FwdT&& item) {
+ if (!check_update_item(item)) { return; }
if (is_empty()) {
- min_value_ = new (allocator_.allocate(1)) T(item);
- max_value_ = new (allocator_.allocate(1)) T(item);
+ min_item_ = new (allocator_.allocate(1)) T(item);
+ max_item_ = new (allocator_.allocate(1)) T(item);
} else {
- if (C()(item, *min_value_)) *min_value_ = item;
- if (C()(*max_value_, item)) *max_value_ = item;
+ if (C()(item, *min_item_)) *min_item_ = item;
+ if (C()(*max_item_, item)) *max_item_ = item;
}
compactors_[0].append(std::forward<FwdT>(item));
++num_retained_;
++n_;
if (num_retained_ == max_nom_size_) compress();
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename FwdSk>
-void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
+void req_sketch<T, C, A>::merge(FwdSk&& other) {
if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
if (other.is_empty()) return;
if (is_empty()) {
- min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
- max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
+ min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
+ max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
} else {
- if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
- if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
+ if (C()(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
+ if (C()(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
}
// grow until this has at least as many compactors as other
while (get_num_levels() < other.get_num_levels()) grow();
@@ -209,39 +217,38 @@ void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
update_max_nom_size();
update_num_retained();
if (num_retained_ >= max_nom_size_) compress();
+ reset_sorted_view();
}
-template<typename T, typename C, typename S, typename A>
-const T& req_sketch<T, C, S, A>::get_min_value() const {
+template<typename T, typename C, typename A>
+const T& req_sketch<T, C, A>::get_min_item() const {
if (is_empty()) return get_invalid_value();
- return *min_value_;
+ return *min_item_;
}
-template<typename T, typename C, typename S, typename A>
-const T& req_sketch<T, C, S, A>::get_max_value() const {
+template<typename T, typename C, typename A>
+const T& req_sketch<T, C, A>::get_max_item() const {
if (is_empty()) return get_invalid_value();
- return *max_value_;
+ return *max_item_;
}
-template<typename T, typename C, typename S, typename A>
-C req_sketch<T, C, S, A>::get_comparator() const {
+template<typename T, typename C, typename A>
+C req_sketch<T, C, A>::get_comparator() const {
return C();
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-double req_sketch<T, C, S, A>::get_rank(const T& item) const {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
uint64_t weight = 0;
for (const auto& compactor: compactors_) {
- weight += compactor.template compute_weight<inclusive>(item);
+ weight += compactor.compute_weight(item, inclusive);
}
return static_cast<double>(weight) / n_;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-auto req_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
- auto buckets = get_CDF<inclusive>(split_points, size);
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
+ auto buckets = get_CDF(split_points, size, inclusive);
if (is_empty()) return buckets;
for (uint32_t i = size; i > 0; --i) {
buckets[i] -= buckets[i - 1];
@@ -249,58 +256,49 @@ auto req_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const
return buckets;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-auto req_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
vector_double buckets(allocator_);
if (is_empty()) return buckets;
check_split_points(split_points, size);
buckets.reserve(size + 1);
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
+ for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank(split_points[i], inclusive));
buckets.push_back(1);
return buckets;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-auto req_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
if (is_empty()) return get_invalid_value();
- if (rank == 0.0) return *min_value_;
- if (rank == 1.0) return *max_value_;
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
// possible side-effect of sorting level zero
- return get_sorted_view<inclusive>(true).get_quantile(rank);
+ setup_sorted_view();
+ return sorted_view_->get_quantile(rank, inclusive);
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-std::vector<T, A> req_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
+template<typename T, typename C, typename A>
+std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
std::vector<T, A> quantiles(allocator_);
if (is_empty()) return quantiles;
quantiles.reserve(size);
// possible side-effect of sorting level zero
- auto view = get_sorted_view<inclusive>(true);
+ setup_sorted_view();
for (uint32_t i = 0; i < size; ++i) {
const double rank = ranks[i];
if ((rank < 0.0) || (rank > 1.0)) {
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
- }
- if (rank == 0.0) quantiles.push_back(*min_value_);
- else if (rank == 1.0) quantiles.push_back(*max_value_);
- else {
- quantiles.push_back(view.get_quantile(rank));
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
}
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
}
return quantiles;
}
-template<typename T, typename C, typename S, typename A>
-template<bool inclusive>
-quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
+template<typename T, typename C, typename A>
+quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
if (!compactors_[0].is_sorted()) {
const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
}
@@ -310,27 +308,27 @@ quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(boo
view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
}
- if (cumulative) view.template convert_to_cummulative<inclusive>();
+ view.convert_to_cummulative();
return view;
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
return get_rank_lb(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
return get_rank_ub(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
return get_rank_lb(k, 2, rank, 1, n, hra);
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
const double fixed = FIXED_RSE_FACTOR / k;
@@ -339,8 +337,8 @@ double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, doubl
return std::max(lb_rel, lb_fix);
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
const double fixed = FIXED_RSE_FACTOR / k;
@@ -349,23 +347,23 @@ double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, doubl
return std::min(ub_rel, ub_fix);
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
const unsigned base_cap = k * req_constants::INIT_NUM_SECTIONS;
if (num_levels == 1 || n <= base_cap) return true;
const double exact_rank_thresh = static_cast<double>(base_cap) / n;
return (hra && rank >= 1.0 - exact_rank_thresh) || (!hra && rank <= exact_rank_thresh);
}
-template<typename T, typename C, typename S, typename A>
-double req_sketch<T, C, S, A>::relative_rse_factor() {
+template<typename T, typename C, typename A>
+double req_sketch<T, C, A>::relative_rse_factor() {
return sqrt(0.0512 / req_constants::INIT_NUM_SECTIONS);
}
// implementation for fixed-size arithmetic types (integral and floating point)
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
-size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
+size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
size_t size = PREAMBLE_SIZE_BYTES;
if (is_empty()) return size;
if (is_estimation_mode()) {
@@ -380,15 +378,15 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
}
// implementation for all other types
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
-size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
+size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
size_t size = PREAMBLE_SIZE_BYTES;
if (is_empty()) return size;
if (is_estimation_mode()) {
size += sizeof(n_);
- size += sd.size_of_item(*min_value_);
- size += sd.size_of_item(*max_value_);
+ size += sd.size_of_item(*min_item_);
+ size += sd.size_of_item(*max_item_);
}
if (n_ == 1) {
size += sd.size_of_item(*compactors_[0].begin());
@@ -398,9 +396,9 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
return size;
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
+void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
write(os, preamble_ints);
const uint8_t serial_version = SERIAL_VERSION;
@@ -423,8 +421,8 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
if (is_empty()) return;
if (is_estimation_mode()) {
write(os, n_);
- sd.serialize(os, min_value_, 1);
- sd.serialize(os, max_value_, 1);
+ sd.serialize(os, min_item_, 1);
+ sd.serialize(os, max_item_, 1);
}
if (raw_items) {
sd.serialize(os, compactors_[0].begin(), num_raw_items);
@@ -433,9 +431,9 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
}
}
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
+auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
vector_bytes bytes(size, 0, allocator_);
uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -463,8 +461,8 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
if (!is_empty()) {
if (is_estimation_mode()) {
ptr += copy_to_mem(n_, ptr);
- ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
- ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
}
if (raw_items) {
ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
@@ -475,14 +473,9 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
return bytes;
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
- return deserialize(is, S(), allocator);
-}
-
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
+req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
const auto preamble_ints = read<uint8_t>(is);
const auto serial_version = read<uint8_t>(is);
const auto family_id = read<uint8_t>(is);
@@ -502,10 +495,10 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
A alloc(allocator);
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -514,12 +507,12 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
uint64_t n = 1;
if (num_levels > 1) {
n = read<uint64_t>(is);
- sd.deserialize(is, min_value_buffer.get(), 1);
+ sd.deserialize(is, min_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- sd.deserialize(is, max_value_buffer.get(), 1);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ sd.deserialize(is, max_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
}
if (raw_items) {
@@ -539,26 +532,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
if (C()(*it, *min_it)) min_it = it;
if (C()(*max_it, *it)) max_it = it;
}
- new (min_value_buffer.get()) T(*min_it);
+ new (min_item_buffer.get()) T(*min_it);
// copy did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- new (max_value_buffer.get()) T(*max_it);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ new (max_item_buffer.get()) T(*max_it);
// copy did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
}
if (!is.good()) throw std::runtime_error("error reading from std::istream");
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors));
}
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
- return deserialize(bytes, size, S(), allocator);
-}
-
-template<typename T, typename C, typename S, typename A>
+template<typename T, typename C, typename A>
template<typename SerDe>
-req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
+req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
ensure_minimum_memory(size, 8);
const char* ptr = static_cast<const char*>(bytes);
const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -588,10 +576,10 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
A alloc(allocator);
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -601,12 +589,12 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
if (num_levels > 1) {
ensure_minimum_memory(end_ptr - ptr, sizeof(n));
ptr += copy_from_mem(ptr, n);
- ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
// serde call did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
}
if (raw_items) {
@@ -630,43 +618,43 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
if (C()(*it, *min_it)) min_it = it;
if (C()(*max_it, *it)) max_it = it;
}
- new (min_value_buffer.get()) T(*min_it);
+ new (min_item_buffer.get()) T(*min_it);
// copy did not throw, repackage with destrtuctor
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
- new (max_value_buffer.get()) T(*max_it);
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+ new (max_item_buffer.get()) T(*max_it);
// copy did not throw, repackage with destrtuctor
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
}
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors));
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::grow() {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::grow() {
const uint8_t lg_weight = get_num_levels();
compactors_.push_back(Compactor(hra_, lg_weight, k_, allocator_));
update_max_nom_size();
}
-template<typename T, typename C, typename S, typename A>
-uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
+template<typename T, typename C, typename A>
+uint8_t req_sketch<T, C, A>::get_num_levels() const {
return static_cast<uint8_t>(compactors_.size());
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::update_max_nom_size() {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::update_max_nom_size() {
max_nom_size_ = 0;
for (const auto& compactor: compactors_) max_nom_size_ += compactor.get_nom_capacity();
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::update_num_retained() {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::update_num_retained() {
num_retained_ = 0;
for (const auto& compactor: compactors_) num_retained_ += compactor.get_num_items();
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::compress() {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::compress() {
for (size_t h = 0; h < compactors_.size(); ++h) {
if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
if (h == 0) compactors_[0].sort();
@@ -681,8 +669,8 @@ void req_sketch<T, C, S, A>::compress() {
}
}
-template<typename T, typename C, typename S, typename A>
-string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
+template<typename T, typename C, typename A>
+string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
// The stream does not support passing an allocator instance, and alternatives are complicated.
std::ostringstream os;
@@ -697,8 +685,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
os << " Retained items : " << num_retained_ << std::endl;
os << " Capacity items : " << max_nom_size_ << std::endl;
if (!is_empty()) {
- os << " Min value : " << *min_value_ << std::endl;
- os << " Max value : " << *max_value_ << std::endl;
+ os << " Min item : " << *min_item_ << std::endl;
+ os << " Max item : " << *max_item_ << std::endl;
}
os << "### End sketch summary" << std::endl;
@@ -728,8 +716,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
return string<A>(os.str().c_str(), allocator_);
}
-template<typename T, typename C, typename S, typename A>
-class req_sketch<T, C, S, A>::item_deleter {
+template<typename T, typename C, typename A>
+class req_sketch<T, C, A>::item_deleter {
public:
item_deleter(const A& allocator): allocator_(allocator) {}
void operator() (T* ptr) {
@@ -742,8 +730,8 @@ class req_sketch<T, C, S, A>::item_deleter {
A allocator_;
};
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item, std::vector<Compactor, AllocCompactor>&& compactors):
allocator_(compactors.get_allocator()),
k_(k),
hra_(hra),
@@ -751,15 +739,16 @@ max_nom_size_(0),
num_retained_(0),
n_(n),
compactors_(std::move(compactors)),
-min_value_(min_value.release()),
-max_value_(max_value.release())
+min_item_(min_item.release()),
+max_item_(max_item.release()),
+sorted_view_(nullptr)
{
update_max_nom_size();
update_num_retained();
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
const uint8_t expected_preamble_ints = num_levels > 1 ? 4 : 2;
if (preamble_ints != expected_preamble_ints) {
throw std::invalid_argument("Possible corruption: preamble ints must be "
@@ -767,8 +756,8 @@ void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
}
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
if (serial_version != SERIAL_VERSION) {
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
+ std::to_string(SERIAL_VERSION)
@@ -776,35 +765,53 @@ void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
}
}
-template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::check_family_id(uint8_t family_id) {
if (family_id != FAMILY) {
throw std::invalid_argument("Possible corruption: family mismatch: expected "
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
}
}
-template<typename T, typename C, typename S, typename A>
-auto req_sketch<T, C, S, A>::begin() const -> const_iterator {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::begin() const -> const_iterator {
return const_iterator(compactors_.begin(), compactors_.end());
}
-template<typename T, typename C, typename S, typename A>
-auto req_sketch<T, C, S, A>::end() const -> const_iterator {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::end() const -> const_iterator {
return const_iterator(compactors_.end(), compactors_.end());
}
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::setup_sorted_view() const {
+ if (sorted_view_ == nullptr) {
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantile_sketch_sorted_view<T, C, A>(get_sorted_view());
+ }
+}
+
+template<typename T, typename C, typename A>
+void req_sketch<T, C, A>::reset_sorted_view() {
+ if (sorted_view_ != nullptr) {
+ sorted_view_->~quantile_sketch_sorted_view();
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantile_sketch_sorted_view<T, C, A>>;
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
+ sorted_view_ = nullptr;
+ }
+}
+
// iterator
-template<typename T, typename C, typename S, typename A>
-req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
+template<typename T, typename C, typename A>
+req_sketch<T, C, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
levels_it_(begin),
levels_end_(end),
compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
{}
-template<typename T, typename C, typename S, typename A>
-auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::const_iterator::operator++() -> const_iterator& {
++compactor_it_;
if (compactor_it_ == (*levels_it_).end()) {
++levels_it_;
@@ -813,27 +820,27 @@ auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
return *this;
}
-template<typename T, typename C, typename S, typename A>
-auto req_sketch<T, C, S, A>::const_iterator::operator++(int) -> const_iterator& {
+template<typename T, typename C, typename A>
+auto req_sketch<T, C, A>::const_iterator::operator++(int) -> const_iterator& {
const_iterator tmp(*this);
operator++();
return tmp;
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
if (levels_it_ != other.levels_it_) return false;
if (levels_it_ == levels_end_) return true;
return compactor_it_ == other.compactor_it_;
}
-template<typename T, typename C, typename S, typename A>
-bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
+template<typename T, typename C, typename A>
+bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
return !operator==(other);
}
-template<typename T, typename C, typename S, typename A>
-std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
+template<typename T, typename C, typename A>
+std::pair<const T&, const uint64_t> req_sketch<T, C, A>::const_iterator::operator*() const {
return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
}
diff --git a/req/test/CMakeLists.txt b/req/test/CMakeLists.txt
index a509068..9afde9c 100755
--- a/req/test/CMakeLists.txt
+++ b/req/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(req_test)
-target_link_libraries(req_test req common_test)
+target_link_libraries(req_test req common_test_lib)
set_target_properties(req_test PROPERTIES
CXX_STANDARD 11
diff --git a/req/test/req_sketch_custom_type_test.cpp b/req/test/req_sketch_custom_type_test.cpp
index 2cd2174..c36892f 100644
--- a/req/test/req_sketch_custom_type_test.cpp
+++ b/req/test/req_sketch_custom_type_test.cpp
@@ -26,7 +26,7 @@
namespace datasketches {
-using req_test_type_sketch = req_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
+using req_test_type_sketch = req_sketch<test_type, test_type_less, test_allocator<test_type>>;
using alloc = test_allocator<test_type>;
TEST_CASE("req sketch custom type", "[req_sketch]") {
@@ -37,17 +37,17 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
SECTION("compact level zero") {
req_test_type_sketch sketch(4, true, 0);
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
for (int i = 0; i < 24; ++i) sketch.update(i);
//std::cout << sketch.to_string(true);
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() > sketch.get_num_retained());
- REQUIRE(sketch.get_min_value().get_value() == 0);
- REQUIRE(sketch.get_max_value().get_value() == 23);
+ REQUIRE(sketch.get_min_item().get_value() == 0);
+ REQUIRE(sketch.get_max_item().get_value() == 23);
}
SECTION("merge small") {
@@ -63,8 +63,8 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
REQUIRE_FALSE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
- REQUIRE(sketch2.get_min_value().get_value() == 1);
- REQUIRE(sketch2.get_max_value().get_value() == 2);
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
}
SECTION("merge higher levels") {
@@ -80,8 +80,8 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode());
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
- REQUIRE(sketch2.get_min_value().get_value() == 0);
- REQUIRE(sketch2.get_max_value().get_value() == 23);
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
+ REQUIRE(sketch2.get_max_item().get_value() == 23);
}
SECTION("serialize deserialize") {
@@ -91,17 +91,17 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
for (int i = 0; i < n; i++) sketch1.update(i);
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
- sketch1.serialize(s);
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
- auto sketch2 = req_test_type_sketch::deserialize(s, alloc(0));
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
+ sketch1.serialize(s, test_type_serde());
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
+ auto sketch2 = req_test_type_sketch::deserialize(s, test_type_serde(), alloc(0));
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
REQUIRE(s.tellg() == s.tellp());
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
REQUIRE(sketch2.get_n() == sketch1.get_n());
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
@@ -114,8 +114,8 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
req_test_type_sketch sketch2(4, true, 0);
sketch2.update(10);
sketch2.merge(std::move(sketch1));
- REQUIRE(sketch2.get_min_value().get_value() == 0);
- REQUIRE(sketch2.get_max_value().get_value() == 10);
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
REQUIRE(sketch2.get_n() == 11);
}
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index 257ba4c..abe4979 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -45,8 +45,8 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
REQUIRE(sketch.get_num_retained() == 0);
REQUIRE(std::isnan(sketch.get_rank(0)));
REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
- REQUIRE(std::isnan(sketch.get_min_value()));
- REQUIRE(std::isnan(sketch.get_max_value()));
+ REQUIRE(std::isnan(sketch.get_min_item()));
+ REQUIRE(std::isnan(sketch.get_max_item()));
REQUIRE(std::isnan(sketch.get_quantile(0)));
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
REQUIRE(std::isnan(sketch.get_quantile(1)));
@@ -66,16 +66,16 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_rank(1.0f) == 0);
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
- REQUIRE(sketch.get_rank(1.1f) == 1);
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
+ REQUIRE(sketch.get_rank(1.0f) == 1);
+ REQUIRE(sketch.get_rank(1.1f, false) == 1);
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
- REQUIRE(sketch.get_quantile(0) == 1);
- REQUIRE(sketch.get_quantile(0.5) == 1);
- REQUIRE(sketch.get_quantile(1) == 1);
+ REQUIRE(sketch.get_quantile(0, false) == 1);
+ REQUIRE(sketch.get_quantile(0.5, false) == 1);
+ REQUIRE(sketch.get_quantile(1, false) == 1);
const double ranks[3] {0, 0.5, 1};
- auto quantiles = sketch.get_quantiles(ranks, 3);
+ auto quantiles = sketch.get_quantiles(ranks, 3, false);
REQUIRE(quantiles.size() == 3);
REQUIRE(quantiles[0] == 1);
REQUIRE(quantiles[1] == 1);
@@ -101,10 +101,10 @@ TEST_CASE("req sketch: repeated values", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 6);
REQUIRE(sketch.get_num_retained() == 6);
- REQUIRE(sketch.get_rank(1.0f) == 0);
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
- REQUIRE(sketch.get_rank<true>(2.0f) == 1);
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
+ REQUIRE(sketch.get_rank(1.0f) == 0.5);
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
+ REQUIRE(sketch.get_rank(2.0f) == 1);
}
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
@@ -115,48 +115,48 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
REQUIRE(sketch.get_n() == 10);
REQUIRE(sketch.get_num_retained() == 10);
- // like KLL
- REQUIRE(sketch.get_rank(1.0f) == 0);
- REQUIRE(sketch.get_rank(2.0f) == 0.1);
- REQUIRE(sketch.get_rank(6.0f) == 0.5);
- REQUIRE(sketch.get_rank(9.0f) == 0.8);
- REQUIRE(sketch.get_rank(10.0f) == 0.9);
+ // exclusive
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.1);
+ REQUIRE(sketch.get_rank(6.0f, false) == 0.5);
+ REQUIRE(sketch.get_rank(9.0f, false) == 0.8);
+ REQUIRE(sketch.get_rank(10.0f, false) == 0.9);
// inclusive
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
- REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
- REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
- REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
- REQUIRE(sketch.get_rank<true>(10.0f) == 1);
+ REQUIRE(sketch.get_rank(1.0f) == 0.1);
+ REQUIRE(sketch.get_rank(2.0f) == 0.2);
+ REQUIRE(sketch.get_rank(5.0f) == 0.5);
+ REQUIRE(sketch.get_rank(9.0f) == 0.9);
+ REQUIRE(sketch.get_rank(10.0f) == 1);
+
+ // exclusive
+ REQUIRE(sketch.get_quantile(0, false) == 1);
+ REQUIRE(sketch.get_quantile(0.1, false) == 2);
+ REQUIRE(sketch.get_quantile(0.5, false) == 6);
+ REQUIRE(sketch.get_quantile(0.9, false) == 10);
+ REQUIRE(sketch.get_quantile(1, false) == 10);
- // like KLL
+ // inclusive
REQUIRE(sketch.get_quantile(0) == 1);
- REQUIRE(sketch.get_quantile(0.1) == 2);
- REQUIRE(sketch.get_quantile(0.5) == 6);
- REQUIRE(sketch.get_quantile(0.9) == 10);
+ REQUIRE(sketch.get_quantile(0.1) == 1);
+ REQUIRE(sketch.get_quantile(0.5) == 5);
+ REQUIRE(sketch.get_quantile(0.9) == 9);
REQUIRE(sketch.get_quantile(1) == 10);
- // inclusive
- REQUIRE(sketch.get_quantile<true>(0) == 1);
- REQUIRE(sketch.get_quantile<true>(0.1) == 1);
- REQUIRE(sketch.get_quantile<true>(0.5) == 5);
- REQUIRE(sketch.get_quantile<true>(0.9) == 9);
- REQUIRE(sketch.get_quantile<true>(1) == 10);
-
const double ranks[3] {0, 0.5, 1};
- auto quantiles = sketch.get_quantiles(ranks, 3);
+ auto quantiles = sketch.get_quantiles(ranks, 3, false);
REQUIRE(quantiles.size() == 3);
REQUIRE(quantiles[0] == 1);
REQUIRE(quantiles[1] == 6);
REQUIRE(quantiles[2] == 10);
const float splits[3] {2, 6, 9};
- auto cdf = sketch.get_CDF(splits, 3);
+ auto cdf = sketch.get_CDF(splits, 3, false);
REQUIRE(cdf[0] == 0.1);
REQUIRE(cdf[1] == 0.5);
REQUIRE(cdf[2] == 0.8);
REQUIRE(cdf[3] == 1);
- auto pmf = sketch.get_PMF(splits, 3);
+ auto pmf = sketch.get_PMF(splits, 3, false);
REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
@@ -175,12 +175,12 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
REQUIRE(sketch.get_n() == n);
// std::cout << sketch.to_string(true);
REQUIRE(sketch.get_num_retained() < n);
- REQUIRE(sketch.get_rank(0) == 0);
- REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
- REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
- REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
- REQUIRE(sketch.get_min_value() == 0);
- REQUIRE(sketch.get_max_value() == n - 1);
+ REQUIRE(sketch.get_rank(0, false) == 0);
+ REQUIRE(sketch.get_rank(static_cast<float>(n), false) == 1);
+ REQUIRE(sketch.get_rank(n / 2.0f, false) == Approx(0.5).margin(0.01));
+ REQUIRE(sketch.get_rank(n - 1.0f, false) == Approx(1).margin(0.01));
+ REQUIRE(sketch.get_min_item() == 0);
+ REQUIRE(sketch.get_max_item() == n - 1);
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
@@ -203,8 +203,8 @@ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
}
TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
@@ -218,8 +218,8 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(std::isnan(sketch2.get_min_value()));
- REQUIRE(std::isnan(sketch2.get_max_value()));
+ REQUIRE(std::isnan(sketch2.get_min_item()));
+ REQUIRE(std::isnan(sketch2.get_max_item()));
}
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
@@ -234,8 +234,8 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
@@ -251,8 +251,8 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
@@ -269,8 +269,8 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
@@ -288,8 +288,8 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
@@ -306,8 +306,8 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
@@ -324,8 +324,8 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
@@ -350,8 +350,8 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
REQUIRE(sketch2.get_n() == sketch.get_n());
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
}
TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
@@ -363,8 +363,8 @@ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 0);
REQUIRE(sketch.get_num_retained() == 0);
- REQUIRE(std::isnan(sketch.get_min_value()));
- REQUIRE(std::isnan(sketch.get_max_value()));
+ REQUIRE(std::isnan(sketch.get_min_item()));
+ REQUIRE(std::isnan(sketch.get_max_item()));
}
TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
@@ -376,10 +376,10 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 1);
REQUIRE(sketch.get_num_retained() == 1);
- REQUIRE(sketch.get_min_value() == 1);
- REQUIRE(sketch.get_max_value() == 1);
- REQUIRE(sketch.get_rank(1.0f) == 0);
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
+ REQUIRE(sketch.get_min_item() == 1);
+ REQUIRE(sketch.get_max_item() == 1);
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
+ REQUIRE(sketch.get_rank(1.0f) == 1);
}
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -391,9 +391,9 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 4);
REQUIRE(sketch.get_num_retained() == 4);
- REQUIRE(sketch.get_min_value() == 0);
- REQUIRE(sketch.get_max_value() == 3);
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
+ REQUIRE(sketch.get_min_item() == 0);
+ REQUIRE(sketch.get_max_item() == 3);
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
}
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -405,9 +405,9 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
REQUIRE_FALSE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 100);
REQUIRE(sketch.get_num_retained() == 100);
- REQUIRE(sketch.get_min_value() == 0);
- REQUIRE(sketch.get_max_value() == 99);
- REQUIRE(sketch.get_rank(50.0f) == 0.5);
+ REQUIRE(sketch.get_min_item() == 0);
+ REQUIRE(sketch.get_max_item() == 99);
+ REQUIRE(sketch.get_rank(50.0f, false) == 0.5);
}
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -419,9 +419,9 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
REQUIRE(sketch.is_estimation_mode());
REQUIRE(sketch.get_n() == 10000);
REQUIRE(sketch.get_num_retained() == 2942);
- REQUIRE(sketch.get_min_value() == 0);
- REQUIRE(sketch.get_max_value() == 9999);
- REQUIRE(sketch.get_rank(5000.0f) == 0.5);
+ REQUIRE(sketch.get_min_item() == 0);
+ REQUIRE(sketch.get_max_item() == 9999);
+ REQUIRE(sketch.get_rank(5000.0f, false) == 0.5);
}
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
@@ -431,11 +431,11 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
sketch1.merge(sketch2);
- REQUIRE(sketch1.get_min_value() == 0);
- REQUIRE(sketch1.get_max_value() == 999);
- REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
- REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
- REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
+ REQUIRE(sketch1.get_min_item() == 0);
+ REQUIRE(sketch1.get_max_item() == 999);
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(250).epsilon(0.01));
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(500).epsilon(0.01));
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(750).epsilon(0.01));
REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
}
@@ -447,11 +447,11 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
sketch1.merge(sketch2);
- REQUIRE(sketch1.get_min_value() == 0);
- REQUIRE(sketch1.get_max_value() == 1999);
- REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
- REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
- REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
+ REQUIRE(sketch1.get_min_item() == 0);
+ REQUIRE(sketch1.get_max_item() == 1999);
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(500).epsilon(0.01));
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).epsilon(0.01));
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).epsilon(0.01));
REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
}
@@ -469,9 +469,9 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
sketch.merge(sketch1);
sketch.merge(sketch2);
sketch.merge(sketch3);
- REQUIRE(sketch.get_min_value() == 0);
- REQUIRE(sketch.get_max_value() == 119);
- REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
+ REQUIRE(sketch.get_min_item() == 0);
+ REQUIRE(sketch.get_max_item() == 119);
+ REQUIRE(sketch.get_quantile(0.5) == Approx(60).epsilon(0.02));
REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
}
@@ -503,8 +503,8 @@ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
REQUIRE(req_float.get_n() == req_double.get_n());
REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
- auto sv_float = req_float.get_sorted_view(false);
- auto sv_double = req_double.get_sorted_view(false);
+ auto sv_float = req_float.get_sorted_view();
+ auto sv_double = req_double.get_sorted_view();
auto sv_float_it = sv_float.begin();
auto sv_double_it = sv_double.begin();
while (sv_float_it != sv_float.end()) {
@@ -551,6 +551,17 @@ TEST_CASE("req sketch: type conversion - custom types") {
REQUIRE(sb.get_n() == 3);
}
+TEST_CASE("get_rank equivalence") {
+ req_sketch<int> sketch(12);
+ const size_t n = 1000;
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
+ REQUIRE(sketch.get_n() == n);
+ auto view = sketch.get_sorted_view();
+ for (size_t i = 0; i < n; ++i) {
+ REQUIRE(sketch.get_rank(i) == view.get_rank(i));
+ }
+}
+
//TEST_CASE("for manual comparison with Java") {
// req_sketch<float> sketch(12, false);
// for (size_t i = 0; i < 100000; ++i) sketch.update(i);
diff --git a/sampling/test/CMakeLists.txt b/sampling/test/CMakeLists.txt
index c6c7d83..98fbfff 100644
--- a/sampling/test/CMakeLists.txt
+++ b/sampling/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(sampling_test)
-target_link_libraries(sampling_test sampling common_test)
+target_link_libraries(sampling_test sampling common_test_lib)
set_target_properties(sampling_test PROPERTIES
CXX_STANDARD 11
diff --git a/theta/test/CMakeLists.txt b/theta/test/CMakeLists.txt
index 147708f..7b1f0de 100644
--- a/theta/test/CMakeLists.txt
+++ b/theta/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(theta_test)
-target_link_libraries(theta_test theta common_test)
+target_link_libraries(theta_test theta common_test_lib)
set_target_properties(theta_test PROPERTIES
CXX_STANDARD 11
diff --git a/tuple/test/CMakeLists.txt b/tuple/test/CMakeLists.txt
index 452c766..f03fd4c 100644
--- a/tuple/test/CMakeLists.txt
+++ b/tuple/test/CMakeLists.txt
@@ -17,7 +17,7 @@
add_executable(tuple_test)
-target_link_libraries(tuple_test tuple common_test)
+target_link_libraries(tuple_test tuple common_test_lib)
set_target_properties(tuple_test PROPERTIES
CXX_STANDARD 11
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org