You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2023/02/23 21:36:07 UTC

[datasketches-cpp] branch hll_rel_err_fix created (now f0824b2)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch hll_rel_err_fix
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


      at f0824b2  check lgk range, handle > 12

This branch includes the following new commits:

     new f0824b2  check lgk range, handle > 12

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 01/01: check lgk range, handle > 12

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch hll_rel_err_fix
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit f0824b2d6b7ced2de1c9fb291d29af5ae43adc54
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Feb 23 13:36:00 2023 -0800

    check lgk range, handle > 12
---
 hll/include/HllArray-internal.hpp | 41 ++++-----------------------------------
 hll/include/HllUtil.hpp           | 19 ++++++++++++------
 2 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/hll/include/HllArray-internal.hpp b/hll/include/HllArray-internal.hpp
index b80e120..1652912 100644
--- a/hll/include/HllArray-internal.hpp
+++ b/hll/include/HllArray-internal.hpp
@@ -322,48 +322,15 @@ double HllArray<A>::getLowerBound(uint8_t numStdDev) const {
   HllUtil<A>::checkNumStdDev(numStdDev);
   const uint32_t configK = 1 << this->lgConfigK_;
   const double numNonZeros = ((curMin_ == 0) ? (configK - numAtCurMin_) : configK);
-
-  double estimate;
-  double rseFactor;
-  if (oooFlag_) {
-    estimate = getCompositeEstimate();
-    rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
-  } else {
-    estimate = hipAccum_;
-    rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
-  }
-
-  double relErr;
-  if (this->lgConfigK_ > 12) {
-    relErr = (numStdDev * rseFactor) / sqrt(configK);
-  } else {
-    relErr = HllUtil<A>::getRelErr(false, oooFlag_, this->lgConfigK_, numStdDev);
-  }
-  return fmax(estimate / (1.0 + relErr), numNonZeros);
+  const double relErr = HllUtil<A>::getRelErr(false, this->oooFlag_, this->lgConfigK_, numStdDev);
+  return fmax(getEstimate() / (1.0 + relErr), numNonZeros);
 }
 
 template<typename A>
 double HllArray<A>::getUpperBound(uint8_t numStdDev) const {
   HllUtil<A>::checkNumStdDev(numStdDev);
-  const uint32_t configK = 1 << this->lgConfigK_;
-
-  double estimate;
-  double rseFactor;
-  if (oooFlag_) {
-    estimate = getCompositeEstimate();
-    rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
-  } else {
-    estimate = hipAccum_;
-    rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
-  }
-
-  double relErr;
-  if (this->lgConfigK_ > 12) {
-    relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
-  } else {
-    relErr = HllUtil<A>::getRelErr(true, oooFlag_, this->lgConfigK_, numStdDev);
-  }
-  return estimate / (1.0 + relErr);
+  const double relErr = HllUtil<A>::getRelErr(true, this->oooFlag_, this->lgConfigK_, numStdDev);
+  return getEstimate() / (1.0 + relErr);
 }
 
 /**
diff --git a/hll/include/HllUtil.hpp b/hll/include/HllUtil.hpp
index a78dd83..2ac6f12 100644
--- a/hll/include/HllUtil.hpp
+++ b/hll/include/HllUtil.hpp
@@ -152,12 +152,6 @@ inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, Hash
   MurmurHash3_x64_128(key, keyLen, seed, result);
 }
 
-template<typename A>
-inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
-                                    uint8_t lgConfigK, uint8_t numStdDev) {
-  return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
-}
-
 template<typename A>
 inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
   if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
@@ -167,6 +161,19 @@ inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
   }
 }
 
+template<typename A>
+inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
+                                    uint8_t lgConfigK, uint8_t numStdDev) {
+  checkLgK(lgConfigK);
+  if (lgConfigK > 12) {
+    const double rseFactor = unioned ?
+        hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
+    return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(lgConfigK);
+  } else {
+    return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
+  }
+}
+
 template<typename A>
 inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
   if (capBytes < minBytes) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org