You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/02/13 21:27:48 UTC
[incubator-datasketches-cpp] branch hll_performance updated: move
and some more optimization
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch hll_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/hll_performance by this push:
new 235187f move and some more optimization
235187f is described below
commit 235187ff7f9c85211c84900ee61c3eb46ea271df
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Feb 13 13:27:33 2020 -0800
move and some more optimization
---
hll/include/Hll8Array-internal.hpp | 36 +++++++++---------
hll/include/HllUnion-internal.hpp | 76 +++++++++++++++-----------------------
hll/include/hll.hpp | 8 +++-
hll/test/HllUnionTest.cpp | 3 +-
hll/test/ToFromByteArrayTest.cpp | 2 +-
5 files changed, 57 insertions(+), 68 deletions(-)
diff --git a/hll/include/Hll8Array-internal.hpp b/hll/include/Hll8Array-internal.hpp
index ce93ff5..cb14a0f 100644
--- a/hll/include/Hll8Array-internal.hpp
+++ b/hll/include/Hll8Array-internal.hpp
@@ -113,39 +113,39 @@ void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
// duplication below is to avoid a virtual method call in a loop
if (src.getTgtHllType() == target_hll_type::HLL_8) {
for (int i = 0; i < src_k; i++) {
- const uint8_t src_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
+ const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
const int j = i & dst_mask;
- const uint8_t dst_v = this->hllByteArr[j];
- this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
- if (src_v > dst_v) {
- this->hipAndKxQIncrementalUpdate(dst_v, src_v);
- if (dst_v == 0) {
+ const uint8_t old_v = this->hllByteArr[j];
+ if (new_v > old_v) {
+ this->hllByteArr[j] = new_v;
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
+ if (old_v == 0) {
this->numAtCurMin--;
}
}
}
} else if (src.getTgtHllType() == target_hll_type::HLL_6) {
for (int i = 0; i < src_k; i++) {
- const uint8_t src_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
+ const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
const int j = i & dst_mask;
- const uint8_t dst_v = this->hllByteArr[j];
- this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
- if (src_v > dst_v) {
- this->hipAndKxQIncrementalUpdate(dst_v, src_v);
- if (dst_v == 0) {
+ const uint8_t old_v = this->hllByteArr[j];
+ if (new_v > old_v) {
+ this->hllByteArr[j] = new_v;
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
+ if (old_v == 0) {
this->numAtCurMin--;
}
}
}
} else { // HLL_4
for (int i = 0; i < src_k; i++) {
- const uint8_t src_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
+ const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
const int j = i & dst_mask;
- const uint8_t dst_v = this->hllByteArr[j];
- this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
- if (src_v > dst_v) {
- this->hipAndKxQIncrementalUpdate(dst_v, src_v);
- if (dst_v == 0) {
+ const uint8_t old_v = this->hllByteArr[j];
+ if (new_v > old_v) {
+ this->hllByteArr[j] = new_v;
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
+ if (old_v == 0) {
this->numAtCurMin--;
}
}
diff --git a/hll/include/HllUnion-internal.hpp b/hll/include/HllUnion-internal.hpp
index ff4c262..7735cf7 100644
--- a/hll/include/HllUnion-internal.hpp
+++ b/hll/include/HllUnion-internal.hpp
@@ -77,6 +77,18 @@ hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type)
template<typename A>
void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
+ if (sketch.is_empty()) return;
+ union_impl(sketch, lg_max_k);
+}
+
+template<typename A>
+void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
+ if (sketch.is_empty()) return;
+ if (gadget.is_empty() and sketch.get_target_type() == HLL_8 and sketch.get_lg_config_k() <= lg_max_k) {
+ if (sketch.get_current_mode() == HLL or sketch.get_lg_config_k() == lg_max_k) {
+ gadget = std::move(sketch);
+ }
+ }
union_impl(sketch, lg_max_k);
}
@@ -276,11 +288,10 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>*
const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
const int src_lg_k = src->getLgConfigK();
if (src_lg_k <= tgt_lg_k) {
- return src->copyAs(target_hll_type::HLL_8);
+ return src->copyAs(HLL_8);
}
- const int minLgK = ((src_lg_k < tgt_lg_k) ? src_lg_k : tgt_lg_k);
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
- Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(minLgK, false);
+ Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
tgtHllArr->mergeHll(*src);
//both of these are required for isomorphism
tgtHllArr->putHipAccum(src->getHipAccum());
@@ -299,64 +310,37 @@ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchIm
template<typename A>
void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
- if (sketch.is_empty()) return;
const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
-
- const int lo2bits = src_impl->getCurMode();
- const int hi2bits = (dst_impl->isEmpty()) ? 3 : dst_impl->getCurMode();
-
- const int sw = (hi2bits << 2) | lo2bits;
- switch (sw) {
- case 0: // src: LIST, gadget: LIST
- case 1: // src: SET, gadget: LIST
- case 4: // src: LIST, gadget: SET
- case 5: // src: SET, gadget: SET
- case 8: // src: LIST, gadget: HLL
- case 9: // src: SET, gadget: HLL
- case 12: // src: LIST, gadget: empty
- case 13: // src: SET, gadget: empty
- {
- if (dst_impl->isEmpty() and src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
- dst_impl = src_impl->copyAs(target_hll_type::HLL_8);
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
- }
+ if (src_impl->getCurMode() == LIST or src_impl->getCurMode() == SET) {
+ if (dst_impl->isEmpty() and src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
+ dst_impl = src_impl->copyAs(HLL_8);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
+ } else {
const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
for (auto coupon: *src) {
dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
}
- break;
}
- case 2: // src: HLL, gadget: LIST
- case 6: // src: HLL, gadget: SET
- {
+ } else if (!dst_impl->isEmpty()) { // src is HLL
+ if (dst_impl->getCurMode() == LIST or dst_impl->getCurMode() == SET) {
// swap so that src is LIST or SET, tgt is HLL
// use lg_max_k because LIST has effective K of 2^26
+ const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
dst_impl = copy_or_downsample(src_impl, lg_max_k);
- src_impl = gadget.sketch_impl;
- const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
- Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
- dst->mergeList(*src);
+ static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
- break;
- }
- case 10: { // src: HLL, gadget: HLL
- const int src_lg_k = src_impl->getLgConfigK();
- const int dst_lg_k = dst_impl->getLgConfigK();
- if (src_lg_k < dst_lg_k) {
- dst_impl = copy_or_downsample(dst_impl, src_lg_k);
+ } else { // gadget is HLL
+ if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
+ dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
}
const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
- Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
- dst->mergeHll(*src);
- break;
- }
- case 14: { //src: HLL, gadget: empty
- dst_impl = copy_or_downsample(src_impl, lg_max_k);
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
- break;
+ static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
}
+ } else { // src is HLL, gadget is empty
+ dst_impl = copy_or_downsample(src_impl, lg_max_k);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
}
dst_impl->putOutOfOrderFlag(dst_impl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
gadget.sketch_impl = dst_impl;
diff --git a/hll/include/hll.hpp b/hll/include/hll.hpp
index aeb9146..31db794 100644
--- a/hll/include/hll.hpp
+++ b/hll/include/hll.hpp
@@ -605,10 +605,16 @@ class hll_union_alloc {
bool all = false) const;
/**
- * Update this union operator iwth the given sketch.
+ * Update this union operator with the given sketch.
* @param The given sketch.
*/
void update(const hll_sketch_alloc<A>& sketch);
+
+ /**
+ * Update this union operator with the given temporary sketch.
+ * @param The given sketch.
+ */
+ void update(hll_sketch_alloc<A>&& sketch);
/**
* Present the given std::string as a potential unique item.
diff --git a/hll/test/HllUnionTest.cpp b/hll/test/HllUnionTest.cpp
index 47a1170..38edc1f 100644
--- a/hll/test/HllUnionTest.cpp
+++ b/hll/test/HllUnionTest.cpp
@@ -35,7 +35,6 @@ class HllUnionTest : public CppUnit::TestFixture {
CPPUNIT_TEST(checkCompositeEstimate);
CPPUNIT_TEST(checkConfigKLimits);
CPPUNIT_TEST(checkUbLb);
- //CPPUNIT_TEST(checkEmptyCoupon);
CPPUNIT_TEST(checkConversions);
CPPUNIT_TEST(checkMisc);
CPPUNIT_TEST(checkInputTypes);
@@ -167,7 +166,7 @@ class HllUnionTest : public CppUnit::TestFixture {
v += n2;
hll_union u(lgMaxK);
- u.update(h1);
+ u.update(std::move(h1));
u.update(h2);
hll_sketch result = u.get_result(resultType);
diff --git a/hll/test/ToFromByteArrayTest.cpp b/hll/test/ToFromByteArrayTest.cpp
index 62c8ce9..5837ffa 100644
--- a/hll/test/ToFromByteArrayTest.cpp
+++ b/hll/test/ToFromByteArrayTest.cpp
@@ -32,7 +32,7 @@ class ToFromByteArrayTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(ToFromByteArrayTest);
CPPUNIT_TEST(deserializeFromJava);
CPPUNIT_TEST(toFromSketch);
- //CPPUNIT_TEST(doubleSerialize);
+ CPPUNIT_TEST(doubleSerialize);
CPPUNIT_TEST_SUITE_END();
void doubleSerialize() {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org