You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/02/13 21:27:48 UTC

[incubator-datasketches-cpp] branch hll_performance updated: move and some more optimization

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch hll_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git


The following commit(s) were added to refs/heads/hll_performance by this push:
     new 235187f  move and some more optimization
235187f is described below

commit 235187ff7f9c85211c84900ee61c3eb46ea271df
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Feb 13 13:27:33 2020 -0800

    move and some more optimization
---
 hll/include/Hll8Array-internal.hpp | 36 +++++++++---------
 hll/include/HllUnion-internal.hpp  | 76 +++++++++++++++-----------------------
 hll/include/hll.hpp                |  8 +++-
 hll/test/HllUnionTest.cpp          |  3 +-
 hll/test/ToFromByteArrayTest.cpp   |  2 +-
 5 files changed, 57 insertions(+), 68 deletions(-)

diff --git a/hll/include/Hll8Array-internal.hpp b/hll/include/Hll8Array-internal.hpp
index ce93ff5..cb14a0f 100644
--- a/hll/include/Hll8Array-internal.hpp
+++ b/hll/include/Hll8Array-internal.hpp
@@ -113,39 +113,39 @@ void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
   // duplication below is to avoid a virtual method call in a loop
   if (src.getTgtHllType() == target_hll_type::HLL_8) {
     for (int i = 0; i < src_k; i++) {
-      const uint8_t src_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
+      const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
       const int j = i & dst_mask;
-      const uint8_t dst_v = this->hllByteArr[j];
-      this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
-      if (src_v > dst_v) {
-        this->hipAndKxQIncrementalUpdate(dst_v, src_v);
-        if (dst_v == 0) {
+      const uint8_t old_v = this->hllByteArr[j];
+      if (new_v > old_v) {
+        this->hllByteArr[j] = new_v;
+        this->hipAndKxQIncrementalUpdate(old_v, new_v);
+        if (old_v == 0) {
           this->numAtCurMin--;
         }
       }
     }
   } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
     for (int i = 0; i < src_k; i++) {
-      const uint8_t src_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
+      const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
       const int j = i & dst_mask;
-      const uint8_t dst_v = this->hllByteArr[j];
-      this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
-      if (src_v > dst_v) {
-        this->hipAndKxQIncrementalUpdate(dst_v, src_v);
-        if (dst_v == 0) {
+      const uint8_t old_v = this->hllByteArr[j];
+      if (new_v > old_v) {
+        this->hllByteArr[j] = new_v;
+        this->hipAndKxQIncrementalUpdate(old_v, new_v);
+        if (old_v == 0) {
           this->numAtCurMin--;
         }
       }
     }
   } else { // HLL_4
     for (int i = 0; i < src_k; i++) {
-      const uint8_t src_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
+      const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
       const int j = i & dst_mask;
-      const uint8_t dst_v = this->hllByteArr[j];
-      this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
-      if (src_v > dst_v) {
-        this->hipAndKxQIncrementalUpdate(dst_v, src_v);
-        if (dst_v == 0) {
+      const uint8_t old_v = this->hllByteArr[j];
+      if (new_v > old_v) {
+        this->hllByteArr[j] = new_v;
+        this->hipAndKxQIncrementalUpdate(old_v, new_v);
+        if (old_v == 0) {
           this->numAtCurMin--;
         }
       }
diff --git a/hll/include/HllUnion-internal.hpp b/hll/include/HllUnion-internal.hpp
index ff4c262..7735cf7 100644
--- a/hll/include/HllUnion-internal.hpp
+++ b/hll/include/HllUnion-internal.hpp
@@ -77,6 +77,18 @@ hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type)
 
 template<typename A>
 void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
+  if (sketch.is_empty()) return;
+  union_impl(sketch, lg_max_k);
+}
+
+template<typename A>
+void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
+  if (sketch.is_empty()) return;
+  if (gadget.is_empty() and sketch.get_target_type() == HLL_8 and sketch.get_lg_config_k() <= lg_max_k) {
+    if (sketch.get_current_mode() == HLL or sketch.get_lg_config_k() == lg_max_k) {
+      gadget = std::move(sketch);
+    }
+  }
   union_impl(sketch, lg_max_k);
 }
 
@@ -276,11 +288,10 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>*
   const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
   const int src_lg_k = src->getLgConfigK();
   if (src_lg_k <= tgt_lg_k) {
-    return src->copyAs(target_hll_type::HLL_8);
+    return src->copyAs(HLL_8);
   }
-  const int minLgK = ((src_lg_k < tgt_lg_k) ? src_lg_k : tgt_lg_k);
   typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
-  Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(minLgK, false);
+  Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
   tgtHllArr->mergeHll(*src);
   //both of these are required for isomorphism
   tgtHllArr->putHipAccum(src->getHipAccum());
@@ -299,64 +310,37 @@ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchIm
 
 template<typename A>
 void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
-  if (sketch.is_empty()) return;
   const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
   HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
-
-  const int lo2bits = src_impl->getCurMode();
-  const int hi2bits = (dst_impl->isEmpty()) ? 3 : dst_impl->getCurMode();
-
-  const int sw = (hi2bits << 2) | lo2bits;
-  switch (sw) {
-    case 0: // src: LIST, gadget: LIST
-    case 1: // src: SET, gadget: LIST
-    case 4: // src: LIST, gadget: SET
-    case 5: // src: SET, gadget: SET
-    case 8: // src: LIST, gadget: HLL
-    case 9: // src: SET, gadget: HLL
-    case 12: // src: LIST, gadget: empty
-    case 13: // src: SET, gadget: empty
-    {
-      if (dst_impl->isEmpty() and src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
-        dst_impl = src_impl->copyAs(target_hll_type::HLL_8);
-        gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
-      }
+  if (src_impl->getCurMode() == LIST or src_impl->getCurMode() == SET) {
+    if (dst_impl->isEmpty() and src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
+      dst_impl = src_impl->copyAs(HLL_8);
+      gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
+    } else {
       const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
       for (auto coupon: *src) {
         dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
       }
-      break;
     }
-    case 2: // src: HLL, gadget: LIST
-    case 6: // src: HLL, gadget: SET
-    {
+  } else if (!dst_impl->isEmpty()) { // src is HLL
+    if (dst_impl->getCurMode() == LIST or dst_impl->getCurMode() == SET) {
       // swap so that src is LIST or SET, tgt is HLL
       // use lg_max_k because LIST has effective K of 2^26
+      const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
       dst_impl = copy_or_downsample(src_impl, lg_max_k);
-      src_impl = gadget.sketch_impl;
-      const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
-      Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
-      dst->mergeList(*src);
+      static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
       gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
-      break;
-    }
-    case 10: { // src: HLL, gadget: HLL
-      const int src_lg_k = src_impl->getLgConfigK();
-      const int dst_lg_k = dst_impl->getLgConfigK();
-      if (src_lg_k < dst_lg_k) {
-        dst_impl = copy_or_downsample(dst_impl, src_lg_k);
+    } else { // gadget is HLL
+      if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
+        dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
         gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
       }
       const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
-      Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
-      dst->mergeHll(*src);
-      break;
-    }
-    case 14: { //src: HLL, gadget: empty
-      dst_impl = copy_or_downsample(src_impl, lg_max_k);
-      gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
-      break;
+      static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
     }
+  } else { // src is HLL, gadget is empty
+    dst_impl = copy_or_downsample(src_impl, lg_max_k);
+    gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
   }
   dst_impl->putOutOfOrderFlag(dst_impl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
   gadget.sketch_impl = dst_impl;
diff --git a/hll/include/hll.hpp b/hll/include/hll.hpp
index aeb9146..31db794 100644
--- a/hll/include/hll.hpp
+++ b/hll/include/hll.hpp
@@ -605,10 +605,16 @@ class hll_union_alloc {
                           bool all = false) const;
 
     /**
-     * Update this union operator iwth the given sketch.
+     * Update this union operator with the given sketch.
      * @param The given sketch.
      */
     void update(const hll_sketch_alloc<A>& sketch);
+
+    /**
+     * Update this union operator with the given temporary sketch.
+     * @param The given sketch.
+     */
+    void update(hll_sketch_alloc<A>&& sketch);
   
     /**
      * Present the given std::string as a potential unique item.
diff --git a/hll/test/HllUnionTest.cpp b/hll/test/HllUnionTest.cpp
index 47a1170..38edc1f 100644
--- a/hll/test/HllUnionTest.cpp
+++ b/hll/test/HllUnionTest.cpp
@@ -35,7 +35,6 @@ class HllUnionTest : public CppUnit::TestFixture {
   CPPUNIT_TEST(checkCompositeEstimate);
   CPPUNIT_TEST(checkConfigKLimits);
   CPPUNIT_TEST(checkUbLb);
-  //CPPUNIT_TEST(checkEmptyCoupon);
   CPPUNIT_TEST(checkConversions);
   CPPUNIT_TEST(checkMisc);
   CPPUNIT_TEST(checkInputTypes);
@@ -167,7 +166,7 @@ class HllUnionTest : public CppUnit::TestFixture {
     v += n2;
 
     hll_union u(lgMaxK);
-    u.update(h1);
+    u.update(std::move(h1));
     u.update(h2);
 
     hll_sketch result = u.get_result(resultType);
diff --git a/hll/test/ToFromByteArrayTest.cpp b/hll/test/ToFromByteArrayTest.cpp
index 62c8ce9..5837ffa 100644
--- a/hll/test/ToFromByteArrayTest.cpp
+++ b/hll/test/ToFromByteArrayTest.cpp
@@ -32,7 +32,7 @@ class ToFromByteArrayTest : public CppUnit::TestFixture {
   CPPUNIT_TEST_SUITE(ToFromByteArrayTest);
   CPPUNIT_TEST(deserializeFromJava);
   CPPUNIT_TEST(toFromSketch);
-  //CPPUNIT_TEST(doubleSerialize);
+  CPPUNIT_TEST(doubleSerialize);
   CPPUNIT_TEST_SUITE_END();
 
   void doubleSerialize() {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org