You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/01/15 03:13:40 UTC
[datasketches-cpp] branch quantiles updated: fix backwards logic on a comparator, simplify to reduce redundancy, add additional asserts for debugging
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch quantiles
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
The following commit(s) were added to refs/heads/quantiles by this push:
new 8c1b24c fix backwards logic on a comparator, simplify to reduce redundancy, add additional asserts for debugging
8c1b24c is described below
commit 8c1b24c89488e8071bf3ee1966f80d888d161947
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Fri Jan 14 19:13:28 2022 -0800
fix backwards logic on a comparator, simplify to reduce redundancy, add additional asserts for debugging
---
quantiles/include/quantiles_sketch_impl.hpp | 45 +++++++++--------------------
quantiles/test/quantiles_sketch_test.cpp | 5 ++--
2 files changed, 16 insertions(+), 34 deletions(-)
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index 06d7df9..5d06348 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -358,38 +358,20 @@ double quantiles_sketch<T, C, S, A>::get_rank(const T& value) const {
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
uint64_t weight = 1;
uint64_t total = 0;
- if (inclusive) {
- for (const T &item: base_buffer_) {
- if (!C()(value, item)) {
- total += weight;
- }
- }
- } else {
- for (const T &item: base_buffer_) {
- if (C()(item, value)) {
- total += weight;
- }
- }
+ for (const T &item: base_buffer_) {
+ if (inclusive ? !C()(value, item) : C()(item, value))
+ total += weight;
}
weight *= 2;
for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
if (levels_[level].empty()) { continue; }
const T* data = levels_[level].data();
- if (inclusive) {
- for (uint16_t i = 0; i < k_; ++i) {
- if (C()(value, data[i]))
- break; // levels are sorted, no point comparing further
- else
- total += weight;
- }
- } else {
- for (uint16_t i = 0; i < k_; ++i) {
- if (C()(data[i], value))
- total += weight;
- else
- break; // levels are sorted, no point comparing further
- }
+ for (uint16_t i = 0; i < k_; ++i) {
+ if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
+ total += weight;
+ else
+ break; // levels are sorted, no point comparing further
}
}
return (double) total / n_;
@@ -477,7 +459,7 @@ void quantiles_sketch<T, C, S, A>::process_full_base_buffer() {
std::sort(base_buffer_.begin(), base_buffer_.end(), C());
in_place_propagate_carry(0,
- levels_[0], // unused here, but 0 is guaranted to exist
+ levels_[0], // unused here, but 0 is guaranteed to exist
base_buffer_,
true, *this);
base_buffer_.clear();
@@ -528,9 +510,6 @@ void quantiles_sketch<T, C, S, A>::in_place_propagate_carry(uint8_t starting_lev
sketch.levels_[lvl].clear();
sketch.levels_[ending_level].clear();
zip_buffer(buf_size_2k, sketch.levels_[ending_level]);
- // to release the discarded objects
- //Arrays.fill(levelsArr, (2 + lvl) * k, (2 + lvl + 1) * k, null);
- sketch.levels_[lvl].clear();
} // end of loop over lower levels
// update bit pattern with binary-arithmetic ripple carry
@@ -547,6 +526,7 @@ void quantiles_sketch<T, C, S, A>::zip_buffer(Level& buf_in, Level& buf_out) {
uint32_t rand_offset = random_bit();
#endif
assert(buf_in.size() == 2 * buf_out.capacity());
+ assert(buf_out.size() == 0);
size_t k = buf_out.capacity();
for (uint32_t i = rand_offset, o = 0; o < k; i += 2, ++o) {
buf_out.push_back(std::move(buf_in[i]));
@@ -558,6 +538,7 @@ template<typename T, typename C, typename S, typename A>
void quantiles_sketch<T, C, S, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2, Level& dst) {
assert(src_1.size() == src_2.size());
assert(src_1.size() * 2 == dst.capacity());
+ assert(dst.size() == 0);
auto end1 = src_1.end(), end2 = src_2.end();
auto it1 = src_1.begin(), it2 = src_2.begin();
@@ -565,9 +546,9 @@ void quantiles_sketch<T, C, S, A>::merge_two_size_k_buffers(Level& src_1, Level&
// TODO: probably actually doing copies given Level&?
while (it1 != end1 && it2 != end2) {
if (C()(*it1, *it2)) {
- dst.push_back(std::move(*it2++));
- } else {
dst.push_back(std::move(*it1++));
+ } else {
+ dst.push_back(std::move(*it2++));
}
}
diff --git a/quantiles/test/quantiles_sketch_test.cpp b/quantiles/test/quantiles_sketch_test.cpp
index c660ef0..7f89da2 100644
--- a/quantiles/test/quantiles_sketch_test.cpp
+++ b/quantiles/test/quantiles_sketch_test.cpp
@@ -215,8 +215,9 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
// test rank
for (int i = 0; i < n; i++) {
- const double trueRank = (double) i / n;
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_128));
+ const double trueRank = static_cast<float>(i) / n;
+ const double sketchRank = sketch.get_rank(static_cast<float>(i));
+ REQUIRE(sketchRank == Approx(trueRank).margin(RANK_EPS_FOR_K_128));
}
// test quantiles at every 0.1 percentage point
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org