You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by GitBox <gi...@apache.org> on 2020/02/18 20:40:35 UTC

[GitHub] [incubator-datasketches-cpp] AlexanderSaydakov commented on a change in pull request #85: HLL union performance improvement

AlexanderSaydakov commented on a change in pull request #85: HLL union performance improvement
URL: https://github.com/apache/incubator-datasketches-cpp/pull/85#discussion_r380922233
 
 

 ##########
 File path: hll/include/HllUnion-internal.hpp
 ##########
 @@ -301,169 +309,41 @@ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchIm
 }
 
 template<typename A>
-void hll_union_alloc<A>::union_impl(HllSketchImpl<A>* incoming_impl, const int lg_max_k) {
-  if (gadget.sketch_impl->getTgtHllType() != target_hll_type::HLL_8) {
-    throw std::logic_error("Must call unionImpl() with HLL_8 input");
-  }
-  HllSketchImpl<A>* src_impl = incoming_impl; //default
-  HllSketchImpl<A>* dstImpl = gadget.sketch_impl; //default
-  if ((incoming_impl == nullptr) || incoming_impl->isEmpty()) {
-    return; // gadget.sketch_impl;
-  }
-
-  const int hi2bits = (gadget.sketch_impl->isEmpty()) ? 3 : gadget.sketch_impl->getCurMode();
-  const int lo2bits = incoming_impl->getCurMode();
-
-  const int sw = (hi2bits << 2) | lo2bits;
-  switch (sw) {
-    case 0: { //src: LIST, gadget: LIST
-      pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
-      while (srcItr->nextValid()) {
-        dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
-      }
-      //whichever is True wins:
-      dstImpl->putOutOfOrderFlag(dstImpl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
-      // gadget: cleanly updated as needed
-      break;
-    }
-    case 1: { //src: SET, gadget: LIST
-      //consider a swap here
-      pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //SET
-      while (srcItr->nextValid()) {
-        dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
-      }
-      dstImpl->putOutOfOrderFlag(true); //SET oooFlag is always true
-      // gadget: cleanly updated as needed
-      break;
-    }
-    case 2: { //src: HLL, gadget: LIST
-      //swap so that src is gadget-LIST, tgt is HLL
-      //use lg_max_k because LIST has effective K of 2^26
-      src_impl = gadget.sketch_impl;
-      dstImpl = copy_or_downsample(incoming_impl, lg_max_k);
-      pair_iterator_with_deleter<A> srcItr = src_impl->getIterator();
-      while (srcItr->nextValid()) {
-        dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
-      }
-      //whichever is True wins:
-      dstImpl->putOutOfOrderFlag(src_impl->isOutOfOrderFlag() | dstImpl->isOutOfOrderFlag());
-      // gadget: swapped, replacing with new impl
-      gadget.sketch_impl->get_deleter()(gadget.sketch_impl);
-      break;
-    }
-    case 4: { //src: LIST, gadget: SET
-      pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
-      while (srcItr->nextValid()) {
-        dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
+void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
+  const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
+  HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
+  if (src_impl->getCurMode() == LIST or src_impl->getCurMode() == SET) {
 
 Review comment:
   agree. perhaps we should replace everywhere as a separate task

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org