You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2023/02/23 21:36:08 UTC

[datasketches-cpp] 01/01: check lgk range, handle > 12

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch hll_rel_err_fix
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit f0824b2d6b7ced2de1c9fb291d29af5ae43adc54
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Feb 23 13:36:00 2023 -0800

    check lgk range, handle > 12
---
 hll/include/HllArray-internal.hpp | 41 ++++-----------------------------------
 hll/include/HllUtil.hpp           | 19 ++++++++++++------
 2 files changed, 17 insertions(+), 43 deletions(-)

diff --git a/hll/include/HllArray-internal.hpp b/hll/include/HllArray-internal.hpp
index b80e120..1652912 100644
--- a/hll/include/HllArray-internal.hpp
+++ b/hll/include/HllArray-internal.hpp
@@ -322,48 +322,15 @@ double HllArray<A>::getLowerBound(uint8_t numStdDev) const {
   HllUtil<A>::checkNumStdDev(numStdDev);
   const uint32_t configK = 1 << this->lgConfigK_;
   const double numNonZeros = ((curMin_ == 0) ? (configK - numAtCurMin_) : configK);
-
-  double estimate;
-  double rseFactor;
-  if (oooFlag_) {
-    estimate = getCompositeEstimate();
-    rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
-  } else {
-    estimate = hipAccum_;
-    rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
-  }
-
-  double relErr;
-  if (this->lgConfigK_ > 12) {
-    relErr = (numStdDev * rseFactor) / sqrt(configK);
-  } else {
-    relErr = HllUtil<A>::getRelErr(false, oooFlag_, this->lgConfigK_, numStdDev);
-  }
-  return fmax(estimate / (1.0 + relErr), numNonZeros);
+  const double relErr = HllUtil<A>::getRelErr(false, this->oooFlag_, this->lgConfigK_, numStdDev);
+  return fmax(getEstimate() / (1.0 + relErr), numNonZeros);
 }
 
 template<typename A>
 double HllArray<A>::getUpperBound(uint8_t numStdDev) const {
   HllUtil<A>::checkNumStdDev(numStdDev);
-  const uint32_t configK = 1 << this->lgConfigK_;
-
-  double estimate;
-  double rseFactor;
-  if (oooFlag_) {
-    estimate = getCompositeEstimate();
-    rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
-  } else {
-    estimate = hipAccum_;
-    rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
-  }
-
-  double relErr;
-  if (this->lgConfigK_ > 12) {
-    relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
-  } else {
-    relErr = HllUtil<A>::getRelErr(true, oooFlag_, this->lgConfigK_, numStdDev);
-  }
-  return estimate / (1.0 + relErr);
+  const double relErr = HllUtil<A>::getRelErr(true, this->oooFlag_, this->lgConfigK_, numStdDev);
+  return getEstimate() / (1.0 + relErr);
 }
 
 /**
diff --git a/hll/include/HllUtil.hpp b/hll/include/HllUtil.hpp
index a78dd83..2ac6f12 100644
--- a/hll/include/HllUtil.hpp
+++ b/hll/include/HllUtil.hpp
@@ -152,12 +152,6 @@ inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, Hash
   MurmurHash3_x64_128(key, keyLen, seed, result);
 }
 
-template<typename A>
-inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
-                                    uint8_t lgConfigK, uint8_t numStdDev) {
-  return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
-}
-
 template<typename A>
 inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
   if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
@@ -167,6 +161,19 @@ inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
   }
 }
 
+template<typename A>
+inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
+                                    uint8_t lgConfigK, uint8_t numStdDev) {
+  checkLgK(lgConfigK);
+  if (lgConfigK > 12) {
+    const double rseFactor = unioned ?
+        hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
+    return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(lgConfigK);
+  } else {
+    return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
+  }
+}
+
 template<typename A>
 inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
   if (capBytes < minBytes) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org