You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/02/12 21:00:22 UTC
[incubator-datasketches-cpp] 01/01: HLL union speed improvement
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch hll_performance
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 9aa730853ef5a3c9958b8f5870af14c7ad58aa63
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Feb 12 13:00:02 2020 -0800
HLL union speed improvement
---
common/CMakeLists.txt | 1 +
.../include/inv_pow2_table.hpp | 0
cpc/CMakeLists.txt | 2 -
cpc/include/cpc_sketch_impl.hpp | 2 +-
hll/CMakeLists.txt | 17 +-
hll/include/AuxHashMap-internal.hpp | 32 ++--
hll/include/AuxHashMap.hpp | 14 +-
hll/include/CouponHashSet-internal.hpp | 1 -
hll/include/CouponList-internal.hpp | 49 +++--
hll/include/CouponList.hpp | 8 +-
hll/include/Hll4Array-internal.hpp | 127 +++++--------
hll/include/Hll4Array.hpp | 30 +--
hll/include/Hll6Array-internal.hpp | 66 ++-----
hll/include/Hll6Array.hpp | 23 +--
hll/include/Hll8Array-internal.hpp | 108 ++++++-----
hll/include/Hll8Array.hpp | 23 +--
hll/include/HllArray-internal.hpp | 132 ++++++++-----
hll/include/HllArray.hpp | 54 ++++--
hll/include/HllPairIterator-internal.hpp | 99 ----------
hll/include/HllSketch-internal.hpp | 88 +++++----
hll/include/HllSketchImpl.hpp | 5 +-
hll/include/HllSketchImplFactory.hpp | 93 +---------
hll/include/HllUnion-internal.hpp | 206 +++++----------------
hll/include/IntArrayPairIterator-internal.hpp | 110 -----------
hll/include/PairIterator.hpp | 53 ------
...irIterator.hpp => coupon_iterator-internal.hpp} | 55 +++---
...ntArrayPairIterator.hpp => coupon_iterator.hpp} | 40 ++--
hll/include/hll.hpp | 23 +--
hll/include/hll.private.hpp | 7 +-
hll/test/AuxHashMapTest.cpp | 7 +-
hll/test/CMakeLists.txt | 7 +-
hll/test/CouponListTest.cpp | 24 +--
hll/test/CrossCountingTest.cpp | 59 +++---
hll/test/IsomorphicTest.cpp | 153 +++++++++++++++
34 files changed, 696 insertions(+), 1022 deletions(-)
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
index d5d37af..9f8484b 100644
--- a/common/CMakeLists.txt
+++ b/common/CMakeLists.txt
@@ -31,5 +31,6 @@ target_sources(common
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/CommonUtil.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
)
diff --git a/cpc/include/inv_pow_2_tab.hpp b/common/include/inv_pow2_table.hpp
similarity index 100%
rename from cpc/include/inv_pow_2_tab.hpp
rename to common/include/inv_pow2_table.hpp
diff --git a/cpc/CMakeLists.txt b/cpc/CMakeLists.txt
index 16c22b3..3fc0c3e 100644
--- a/cpc/CMakeLists.txt
+++ b/cpc/CMakeLists.txt
@@ -47,7 +47,6 @@ list(APPEND cpc_HEADERS "include/cpc_union.hpp")
list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
list(APPEND cpc_HEADERS "include/cpc_util.hpp")
list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
-list(APPEND cpc_HEADERS "include/inv_pow_2_tab.hpp")
list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
list(APPEND cpc_HEADERS "include/u32_table.hpp")
list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
@@ -73,7 +72,6 @@ target_sources(cpc
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow_2_tab.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
diff --git a/cpc/include/cpc_sketch_impl.hpp b/cpc/include/cpc_sketch_impl.hpp
index 58eb108..db7250e 100644
--- a/cpc/include/cpc_sketch_impl.hpp
+++ b/cpc/include/cpc_sketch_impl.hpp
@@ -26,7 +26,7 @@
#include "cpc_confidence.hpp"
#include "kxp_byte_lookup.hpp"
-#include "inv_pow_2_tab.hpp"
+#include "inv_pow2_table.hpp"
#include "cpc_util.hpp"
#include "icon_estimator.hpp"
#include "serde.hpp"
diff --git a/hll/CMakeLists.txt b/hll/CMakeLists.txt
index b171b62..ca2c262 100644
--- a/hll/CMakeLists.txt
+++ b/hll/CMakeLists.txt
@@ -40,17 +40,17 @@ list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/Composit
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
-list(APPEND hll_HEADERS "include/HllPairIterator.hpp;include/HllSketchImpl.hpp")
-list(APPEND hll_HEADERS "include/HllUtil.hpp;include/IntArrayPairIterator.hpp")
-list(APPEND hll_HEADERS "include/PairIterator.hpp;include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
+list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
+list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
+list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
-list(APPEND hll_HEADERS "include/HllPairIterator-internal.hpp;include/HllSketch-internal.hpp")
+list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
-list(APPEND hll_HEADERS "include/IntArrayPairIterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
+list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
install(TARGETS hll
EXPORT ${PROJECT_NAME}
@@ -72,12 +72,10 @@ target_sources(hll
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllPairIterator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/IntArrayPairIterator.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/PairIterator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
@@ -88,10 +86,9 @@ target_sources(hll
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllPairIterator-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
- ${CMAKE_CURRENT_SOURCE_DIR}/include/IntArrayPairIterator-internal.hpp
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
)
diff --git a/hll/include/AuxHashMap-internal.hpp b/hll/include/AuxHashMap-internal.hpp
index 6f483fd..0d7db7a 100644
--- a/hll/include/AuxHashMap-internal.hpp
+++ b/hll/include/AuxHashMap-internal.hpp
@@ -22,12 +22,6 @@
#include "HllUtil.hpp"
#include "AuxHashMap.hpp"
-#include "PairIterator.hpp"
-#include "IntArrayPairIterator.hpp"
-
-#include <cstring>
-#include <sstream>
-#include <memory>
namespace datasketches {
@@ -202,20 +196,6 @@ int AuxHashMap<A>::getUpdatableSizeBytes() const {
}
template<typename A>
-std::unique_ptr<PairIterator<A>, std::function<void(PairIterator<A>*)>> AuxHashMap<A>::getIterator() const {
- typedef typename std::allocator_traits<A>::template rebind_alloc<IntArrayPairIterator<A>> iapiAlloc;
- IntArrayPairIterator<A>* itr = new (iapiAlloc().allocate(1)) IntArrayPairIterator<A>(auxIntArr, 1 << lgAuxArrInts, lgConfigK);
- return std::unique_ptr<PairIterator<A>, std::function<void(PairIterator<A>*)>>(
- itr,
- [](PairIterator<A>* ptr) {
- IntArrayPairIterator<A>* itr = static_cast<IntArrayPairIterator<A>*>(ptr);
- itr->~IntArrayPairIterator();
- iapiAlloc().deallocate(itr, 1);
- }
- );
-}
-
-template<typename A>
void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
const int entry_pair = HllUtil<A>::pair(slotNo, value);
@@ -231,7 +211,7 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
}
template<typename A>
-int AuxHashMap<A>::mustFindValueFor(const int slotNo) {
+int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
if (index >= 0) {
return HllUtil<A>::getValue(auxIntArr[index]);
@@ -306,6 +286,16 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
throw std::runtime_error("Key not found and no empty slots!");
}
+template<typename A>
+coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
+ return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
+}
+
+template<typename A>
+coupon_iterator<A> AuxHashMap<A>::end() const {
+ return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
+}
+
}
#endif // _AUXHASHMAP_INTERNAL_HPP_
diff --git a/hll/include/AuxHashMap.hpp b/hll/include/AuxHashMap.hpp
index e5a6a29..b37e85c 100644
--- a/hll/include/AuxHashMap.hpp
+++ b/hll/include/AuxHashMap.hpp
@@ -20,9 +20,11 @@
#ifndef _AUXHASHMAP_HPP_
#define _AUXHASHMAP_HPP_
-#include "PairIterator.hpp"
-
+#include <iostream>
#include <memory>
+#include <functional>
+
+#include "coupon_iterator.hpp"
namespace datasketches {
@@ -51,10 +53,12 @@ class AuxHashMap final {
int getAuxCount() const;
int* getAuxIntArr();
int getLgAuxArrInts() const;
- pair_iterator_with_deleter<A> getIterator() const;
+
+ coupon_iterator<A> begin(bool all = false) const;
+ coupon_iterator<A> end() const;
void mustAdd(int slotNo, int value);
- int mustFindValueFor(int slotNo);
+ int mustFindValueFor(int slotNo) const;
void mustReplace(int slotNo, int value);
private:
@@ -76,4 +80,4 @@ class AuxHashMap final {
#include "AuxHashMap-internal.hpp"
-#endif /* _AUXHASHMAP_HPP_ */
\ No newline at end of file
+#endif /* _AUXHASHMAP_HPP_ */
diff --git a/hll/include/CouponHashSet-internal.hpp b/hll/include/CouponHashSet-internal.hpp
index 1586a57..84c8199 100644
--- a/hll/include/CouponHashSet-internal.hpp
+++ b/hll/include/CouponHashSet-internal.hpp
@@ -22,7 +22,6 @@
#include "CouponHashSet.hpp"
-#include <string>
#include <cstring>
#include <exception>
diff --git a/hll/include/CouponList-internal.hpp b/hll/include/CouponList-internal.hpp
index ed9cde8..dc61645 100644
--- a/hll/include/CouponList-internal.hpp
+++ b/hll/include/CouponList-internal.hpp
@@ -23,11 +23,7 @@
#include "CouponList.hpp"
#include "CubicInterpolation.hpp"
#include "HllUtil.hpp"
-#include "IntArrayPairIterator.hpp"
-#include <iostream>
-#include <cstring>
-#include <cmath>
#include <algorithm>
namespace datasketches {
@@ -220,12 +216,10 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
break;
}
case 1: { // src updatable, dst compact
- pair_iterator_with_deleter<A> itr = getIterator();
- bytes += getMemDataStart(); // reusing ponter for incremental writes
- while (itr->nextValid()) {
- const int pairValue = itr->getPair();
- std::memcpy(bytes, &pairValue, sizeof(pairValue));
- bytes += sizeof(pairValue);
+ bytes += getMemDataStart(); // reusing pointer for incremental writes
+ for (uint32_t coupon: *this) {
+ std::memcpy(bytes, &coupon, sizeof(coupon));
+ bytes += sizeof(coupon);
}
break;
}
@@ -278,10 +272,13 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
break;
}
case 1: { // src updatable, dst compact
- pair_iterator_with_deleter<A> itr = getIterator();
- while (itr->nextValid()) {
- const int pairValue = itr->getPair();
- os.write((char*)&pairValue, sizeof(pairValue));
+// pair_iterator_with_deleter<A> itr = getIterator();
+// while (itr->nextValid()) {
+// const int pairValue = itr->getPair();
+// os.write((char*)&pairValue, sizeof(pairValue));
+// }
+ for (uint32_t coupon: *this) {
+ os.write((char*)&coupon, sizeof(coupon));
}
break;
}
@@ -396,20 +393,6 @@ int* CouponList<A>::getCouponIntArr() const {
}
template<typename A>
-pair_iterator_with_deleter<A> CouponList<A>::getIterator() const {
- typedef typename std::allocator_traits<A>::template rebind_alloc<IntArrayPairIterator<A>> iapiAlloc;
- IntArrayPairIterator<A>* itr = new (iapiAlloc().allocate(1)) IntArrayPairIterator<A>(couponIntArr, 1 << lgCouponArrInts, this->lgConfigK);
- return pair_iterator_with_deleter<A>(
- itr,
- [](PairIterator<A>* ptr) {
- IntArrayPairIterator<A>* iapi = static_cast<IntArrayPairIterator<A>*>(ptr);
- iapi->~IntArrayPairIterator();
- iapiAlloc().deallocate(iapi, 1);
- }
- );
-}
-
-template<typename A>
HllSketchImpl<A>* CouponList<A>::promoteHeapListToSet(CouponList& list) {
return HllSketchImplFactory<A>::promoteListToSet(list);
}
@@ -419,6 +402,16 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
return HllSketchImplFactory<A>::promoteListOrSetToHll(src);
}
+template<typename A>
+coupon_iterator<A> CouponList<A>::begin(bool all) const {
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
+}
+
+template<typename A>
+coupon_iterator<A> CouponList<A>::end() const {
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
+}
+
}
#endif // _COUPONLIST_INTERNAL_HPP_
diff --git a/hll/include/CouponList.hpp b/hll/include/CouponList.hpp
index bcdfedf..063805b 100644
--- a/hll/include/CouponList.hpp
+++ b/hll/include/CouponList.hpp
@@ -21,7 +21,9 @@
#define _COUPONLIST_HPP_
#include "HllSketchImpl.hpp"
-#include "PairIterator.hpp"
+#include "coupon_iterator.hpp"
+
+#include <iostream>
namespace datasketches {
@@ -55,7 +57,9 @@ class CouponList : public HllSketchImpl<A> {
virtual bool isEmpty() const;
virtual int getCouponCount() const;
- virtual pair_iterator_with_deleter<A> getIterator() const;
+
+ coupon_iterator<A> begin(bool all = false) const;
+ coupon_iterator<A> end() const;
protected:
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
diff --git a/hll/include/Hll4Array-internal.hpp b/hll/include/Hll4Array-internal.hpp
index a0897f1..4f47817 100644
--- a/hll/include/Hll4Array-internal.hpp
+++ b/hll/include/Hll4Array-internal.hpp
@@ -30,26 +30,6 @@
namespace datasketches {
template<typename A>
-Hll4Iterator<A>::Hll4Iterator(const Hll4Array<A>& hllArray, const int lengthPairs)
- : HllPairIterator<A>(lengthPairs),
- hllArray(hllArray)
-{}
-
-template<typename A>
-Hll4Iterator<A>::~Hll4Iterator() { }
-
-template<typename A>
-int Hll4Iterator<A>::value() {
- const int nib = hllArray.getSlot(this->index);
- if (nib == HllUtil<A>::AUX_TOKEN) {
- // auxHashMap cannot be null here
- return hllArray.getAuxHashMap()->mustFindValueFor(this->index);
- } else {
- return nib + hllArray.getCurMin();
- }
-}
-
-template<typename A>
Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize) :
HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize) {
const int numBytes = this->hll4ArrBytes(lgConfigK);
@@ -97,28 +77,6 @@ Hll4Array<A>* Hll4Array<A>::copy() const {
}
template<typename A>
-pair_iterator_with_deleter<A> Hll4Array<A>::getIterator() const {
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Iterator<A>> itrAlloc;
- Hll4Iterator<A>* itr = new (itrAlloc().allocate(1)) Hll4Iterator<A>(*this, 1 << this->lgConfigK);
- return pair_iterator_with_deleter<A>(
- itr,
- [](PairIterator<A>* ptr) {
- Hll4Iterator<A>* hll = static_cast<Hll4Iterator<A>*>(ptr);
- hll->~Hll4Iterator();
- itrAlloc().deallocate(hll, 1);
- }
- );
-}
-
-template<typename A>
-pair_iterator_with_deleter<A> Hll4Array<A>::getAuxIterator() const {
- if (auxHashMap != nullptr) {
- return auxHashMap->getIterator();
- }
- return nullptr;
-}
-
-template<typename A>
int Hll4Array<A>::getUpdatableSerializationBytes() const {
AuxHashMap<A>* auxHashMap = getAuxHashMap();
int auxBytes;
@@ -146,71 +104,68 @@ void Hll4Array<A>::putAuxHashMap(AuxHashMap<A>* auxHashMap) {
}
template<typename A>
-int Hll4Array<A>::getSlot(const int slotNo) const {
- int theByte = this->hllByteArr[slotNo >> 1];
+uint8_t Hll4Array<A>::getSlot(int slotNo) const {
+ const uint8_t byte = this->hllByteArr[slotNo >> 1];
if ((slotNo & 1) > 0) { // odd?
- theByte >>= 4;
+ return byte >> 4;
}
- return theByte & HllUtil<A>::loNibbleMask;
+ return byte & HllUtil<A>::loNibbleMask;
+}
+
+template<typename A>
+uint8_t Hll4Array<A>::get_value(uint32_t index) const {
+ const uint8_t value = getSlot(index);
+ if (value != HllUtil<A>::AUX_TOKEN) return value;
+ return auxHashMap->mustFindValueFor(index);
}
template<typename A>
HllSketchImpl<A>* Hll4Array<A>::couponUpdate(const int coupon) {
- const int newValue = HllUtil<A>::getValue(coupon);
- if (newValue <= 0) {
- throw std::logic_error("newValue must be a posittive integer. Found: " + std::to_string(newValue));
- }
+ internalCouponUpdate(coupon);
+ return this;
+}
+template<typename A>
+void Hll4Array<A>::internalCouponUpdate(const int coupon) {
+ const int newValue = HllUtil<A>::getValue(coupon);
if (newValue <= this->curMin) {
- return this; // quick rejection, but only works for large N
+ return; // quick rejection, but only works for large N
}
-
const int configKmask = (1 << this->lgConfigK) - 1;
const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
internalHll4Update(slotNo, newValue);
- return this;
}
template<typename A>
-void Hll4Array<A>::putSlot(const int slotNo, const int newValue) {
+void Hll4Array<A>::putSlot(int slotNo, uint8_t newValue) {
const int byteno = slotNo >> 1;
- const int oldValue = this->hllByteArr[byteno];
+ const uint8_t oldValue = this->hllByteArr[byteno];
if ((slotNo & 1) == 0) { // set low nibble
this->hllByteArr[byteno]
- = (uint8_t) ((oldValue & HllUtil<A>::hiNibbleMask) | (newValue & HllUtil<A>::loNibbleMask));
+ = ((oldValue & HllUtil<A>::hiNibbleMask) | (newValue & HllUtil<A>::loNibbleMask));
} else { // set high nibble
this->hllByteArr[byteno]
- = (uint8_t) ((oldValue & HllUtil<A>::loNibbleMask) | ((newValue << 4) & HllUtil<A>::hiNibbleMask));
+ = ((oldValue & HllUtil<A>::loNibbleMask) | ((newValue << 4) & HllUtil<A>::hiNibbleMask));
}
}
//In C: two-registers.c Line 836 in "hhb_abstract_set_slot_if_new_value_bigger" non-sparse
template<typename A>
void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
- if ((slotNo < 0) || (slotNo >= (1 << this->lgConfigK))) {
- throw std::logic_error("slotNo must be between 0 and 1<<lgConfigK. Found: " + std::to_string(slotNo));
- }
- if (newVal <= 0) {
- throw std::logic_error("newVal must be a posittive integer. Found: " + std::to_string(newVal));
- }
const int rawStoredOldValue = getSlot(slotNo); // could be a 0
// this is provably a LB:
const int lbOnOldValue = rawStoredOldValue + this->curMin; // lower bound, could be 0
if (newVal > lbOnOldValue) { // 842
- // Note: if an AUX_TOKEN exists, then auxHashMap must alraedy exist
+ // Note: if an AUX_TOKEN exists, then auxHashMap must already exist
// 846: rawStoredOldValue == AUX_TOKEN
const int actualOldValue = (rawStoredOldValue < HllUtil<A>::AUX_TOKEN)
? (lbOnOldValue) : (auxHashMap->mustFindValueFor(slotNo));
if (newVal > actualOldValue) { // 848: actualOldValue could still be 0; newValue > 0
// we know that hte array will change, but we haven't actually updated yet
- this->hipAndKxQIncrementalUpdate(*this, actualOldValue, newVal);
-
- if (newVal < this->curMin) {
- throw std::logic_error("newVal cannot be less than curMin at this point");
- }
+ this->hipAndKxQIncrementalUpdate(actualOldValue, newVal);
// newVal >= curMin
@@ -232,10 +187,8 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
else { // case 2: 885
// This is the hypothetical case where the old value is an exception and the new one is not,
// which is impossible given that curMin has not changed here and newVal > oldValue
- throw std::runtime_error("Impossible case");
}
- }
- else { // rawStoredOldValue != AUX_TOKEN
+ } else { // rawStoredOldValue != AUX_TOKEN
if (shiftedNewValue >= HllUtil<A>::AUX_TOKEN) { // case 3: 892
// This is the case where the old value is not an exception and the new value is.
// The AUX_TOKEN must be stored in the 4-bit array and the new value
@@ -255,9 +208,6 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
// we just increased a pair value, so it might be time to change curMin
if (actualOldValue == this->curMin) { // 908
- if (this->numAtCurMin < 1) {
- throw std::logic_error("Invalid state with < 1 entry at curMin");
- }
this->decNumAtCurMin();
while (this->numAtCurMin == 0) {
shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
@@ -312,10 +262,9 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
int oldActualVal;
int newShiftedVal;
- pair_iterator_with_deleter<A> itr = auxHashMap->getIterator();
- while (itr->nextValid()) {
- slotNum = itr->getKey() & configKmask;
- oldActualVal = itr->getValue();
+ for (auto coupon: *auxHashMap) {
+ slotNum = HllUtil<A>::getLow26(coupon) & configKmask;
+ oldActualVal = HllUtil<A>::getValue(coupon);
newShiftedVal = oldActualVal - newCurMin;
if (newShiftedVal < 0) {
throw std::logic_error("oldActualVal < newCurMin when incrementing curMin");
@@ -333,8 +282,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
// Correct the AUX_TOKEN value in the HLL array to the newShiftedVal (14).
putSlot(slotNum, newShiftedVal);
numAuxTokens--;
- }
- else { //newShiftedVal >= AUX_TOKEN
+ } else { //newShiftedVal >= AUX_TOKEN
// the former exception remains an exception, so must be added to the newAuxMap
if (newAuxMap == nullptr) {
newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
@@ -365,6 +313,23 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
this->numAtCurMin = numAtNewCurMin;
}
+template<typename A>
+typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
+ return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
+}
+
+template<typename A>
+typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
+ return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
+}
+
+template<typename A>
+void Hll4Array<A>::mergeHll(const HllArray<A>& src) {
+ for (auto coupon: src) {
+ internalCouponUpdate(coupon);
+ }
+}
+
}
#endif // _HLL4ARRAY_INTERNAL_HPP_
diff --git a/hll/include/Hll4Array.hpp b/hll/include/Hll4Array.hpp
index e407d40..ff56c86 100644
--- a/hll/include/Hll4Array.hpp
+++ b/hll/include/Hll4Array.hpp
@@ -20,7 +20,6 @@
#ifndef _HLL4ARRAY_HPP_
#define _HLL4ARRAY_HPP_
-#include "HllPairIterator.hpp"
#include "AuxHashMap.hpp"
#include "HllArray.hpp"
@@ -40,40 +39,29 @@ class Hll4Array final : public HllArray<A> {
virtual Hll4Array* copy() const;
- virtual pair_iterator_with_deleter<A> getIterator() const;
- virtual pair_iterator_with_deleter<A> getAuxIterator() const;
-
- virtual int getSlot(int slotNo) const final;
- virtual void putSlot(int slotNo, int value) final;
+ inline uint8_t getSlot(int slotNo) const;
+ inline void putSlot(int slotNo, uint8_t value);
+ inline uint8_t get_value(uint32_t index) const;
virtual int getUpdatableSerializationBytes() const;
virtual int getHllByteArrBytes() const;
virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
+ void mergeHll(const HllArray<A>& src);
virtual AuxHashMap<A>* getAuxHashMap() const;
// does *not* delete old map if overwriting
void putAuxHashMap(AuxHashMap<A>* auxHashMap);
- protected:
+ virtual typename HllArray<A>::const_iterator begin(bool all = false) const;
+ virtual typename HllArray<A>::const_iterator end() const;
+
+ private:
+ void internalCouponUpdate(int coupon);
void internalHll4Update(int slotNo, int newVal);
void shiftToBiggerCurMin();
AuxHashMap<A>* auxHashMap;
-
- friend class Hll4Iterator<A>;
-};
-
-template<typename A>
-class Hll4Iterator : public HllPairIterator<A> {
- public:
- Hll4Iterator(const Hll4Array<A>& array, int lengthPairs);
- virtual int value();
-
- virtual ~Hll4Iterator();
-
- private:
- const Hll4Array<A>& hllArray;
};
}
diff --git a/hll/include/Hll6Array-internal.hpp b/hll/include/Hll6Array-internal.hpp
index 486e4f2..a318564 100644
--- a/hll/include/Hll6Array-internal.hpp
+++ b/hll/include/Hll6Array-internal.hpp
@@ -27,26 +27,6 @@
namespace datasketches {
template<typename A>
-Hll6Iterator<A>::Hll6Iterator(const Hll6Array<A>& hllArray, const int lengthPairs)
- : HllPairIterator<A>(lengthPairs),
- hllArray(hllArray),
- bitOffset(-6)
-{}
-
-template<typename A>
-Hll6Iterator<A>::~Hll6Iterator() { }
-
-template<typename A>
-int Hll6Iterator<A>::value() {
- bitOffset += 6;
- const int shift = bitOffset & 0x7;
- const int byteIdx = bitOffset >> 3;
- const uint16_t twoByteVal = (hllArray.hllByteArr[byteIdx + 1] << 8)
- | hllArray.hllByteArr[byteIdx];
- return (uint8_t) (twoByteVal >> shift) & 0x3F;
-}
-
-template<typename A>
Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
const int numBytes = this->hll6ArrBytes(lgConfigK);
@@ -84,37 +64,23 @@ Hll6Array<A>* Hll6Array<A>::copy() const {
}
template<typename A>
-pair_iterator_with_deleter<A> Hll6Array<A>::getIterator() const {
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Iterator<A>> itrAlloc;
- Hll6Iterator<A>* itr = new (itrAlloc().allocate(1)) Hll6Iterator<A>(*this, 1 << this->lgConfigK);
- return pair_iterator_with_deleter<A>(
- itr,
- [](PairIterator<A>* ptr) {
- Hll6Iterator<A>* hll = static_cast<Hll6Iterator<A>*>(ptr);
- hll->~Hll6Iterator();
- itrAlloc().deallocate(hll, 1);
- }
- );
-}
-
-template<typename A>
-int Hll6Array<A>::getSlot(const int slotNo) const {
+uint8_t Hll6Array<A>::getSlot(int slotNo) const {
const int startBit = slotNo * 6;
const int shift = startBit & 0x7;
const int byteIdx = startBit >> 3;
const uint16_t twoByteVal = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
- return (uint8_t) (twoByteVal >> shift) & 0x3F;
+ return (twoByteVal >> shift) & HllUtil<A>::VAL_MASK_6;
}
template<typename A>
-void Hll6Array<A>::putSlot(const int slotNo, const int value) {
+void Hll6Array<A>::putSlot(int slotNo, uint8_t value) {
const int startBit = slotNo * 6;
const int shift = startBit & 0x7;
const int byteIdx = startBit >> 3;
const uint16_t valShifted = (value & 0x3F) << shift;
uint16_t curMasked = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
curMasked &= (~(HllUtil<A>::VAL_MASK_6 << shift));
- uint16_t insert = curMasked | valShifted;
+ const uint16_t insert = curMasked | valShifted;
this->hllByteArr[byteIdx] = insert & 0xFF;
this->hllByteArr[byteIdx + 1] = (insert & 0xFF00) >> 8;
}
@@ -126,25 +92,31 @@ int Hll6Array<A>::getHllByteArrBytes() const {
template<typename A>
HllSketchImpl<A>* Hll6Array<A>::couponUpdate(const int coupon) {
+ internalCouponUpdate(coupon);
+ return this;
+}
+
+template<typename A>
+void Hll6Array<A>::internalCouponUpdate(const int coupon) {
const int configKmask = (1 << this->lgConfigK) - 1;
const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
const int newVal = HllUtil<A>::getValue(coupon);
- if (newVal <= 0) {
- throw std::logic_error("newVal must be a positive integer: " + std::to_string(newVal));
- }
const int curVal = getSlot(slotNo);
if (newVal > curVal) {
putSlot(slotNo, newVal);
- this->hipAndKxQIncrementalUpdate(*this, curVal, newVal);
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
if (curVal == 0) {
- this->decNumAtCurMin(); // interpret numAtCurMin as num zeros
- if (this->getNumAtCurMin() < 0) {
- throw std::logic_error("getNumAtCurMin() must return a nonnegative integer: " + std::to_string(this->getNumAtCurMin()));
- }
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
}
}
- return this;
+}
+
+template<typename A>
+void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
+ for (auto coupon: src) {
+ internalCouponUpdate(coupon);
+ }
}
}
diff --git a/hll/include/Hll6Array.hpp b/hll/include/Hll6Array.hpp
index 5b468db..5178de8 100644
--- a/hll/include/Hll6Array.hpp
+++ b/hll/include/Hll6Array.hpp
@@ -21,7 +21,6 @@
#define _HLL6ARRAY_HPP_
#include "HllArray.hpp"
-#include "HllPairIterator.hpp"
namespace datasketches {
@@ -39,30 +38,16 @@ class Hll6Array final : public HllArray<A> {
virtual Hll6Array* copy() const;
- virtual pair_iterator_with_deleter<A> getIterator() const;
-
- virtual int getSlot(int slotNo) const final;
- virtual void putSlot(int slotNo, int value) final;
+ inline uint8_t getSlot(int slotNo) const;
+ inline void putSlot(int slotNo, uint8_t value);
virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
+ void mergeHll(const HllArray<A>& src);
virtual int getHllByteArrBytes() const;
- protected:
- friend class Hll6Iterator<A>;
-};
-
-template<typename A>
-class Hll6Iterator : public HllPairIterator<A> {
- public:
- Hll6Iterator(const Hll6Array<A>& array, int lengthPairs);
- virtual int value();
-
- virtual ~Hll6Iterator();
-
private:
- const Hll6Array<A>& hllArray;
- int bitOffset;
+ void internalCouponUpdate(int coupon);
};
}
diff --git a/hll/include/Hll8Array-internal.hpp b/hll/include/Hll8Array-internal.hpp
index 2df99d9..ce93ff5 100644
--- a/hll/include/Hll8Array-internal.hpp
+++ b/hll/include/Hll8Array-internal.hpp
@@ -22,25 +22,9 @@
#include "Hll8Array.hpp"
-#include <cstring>
-
namespace datasketches {
template<typename A>
-Hll8Iterator<A>::Hll8Iterator(const Hll8Array<A>& hllArray, const int lengthPairs)
- : HllPairIterator<A>(lengthPairs),
- hllArray(hllArray)
-{}
-
-template<typename A>
-Hll8Iterator<A>::~Hll8Iterator() { }
-
-template<typename A>
-int Hll8Iterator<A>::value() {
- return hllArray.hllByteArr[this->index] & HllUtil<A>::VAL_MASK_6;
-}
-
-template<typename A>
Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
const int numBytes = this->hll8ArrBytes(lgConfigK);
@@ -78,55 +62,95 @@ Hll8Array<A>* Hll8Array<A>::copy() const {
}
template<typename A>
-pair_iterator_with_deleter<A> Hll8Array<A>::getIterator() const {
- typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Iterator<A>> itrAlloc;
- Hll8Iterator<A>* itr = new (itrAlloc().allocate(1)) Hll8Iterator<A>(*this, 1 << this->lgConfigK);
- return pair_iterator_with_deleter<A>(
- itr,
- [](PairIterator<A>* ptr) {
- Hll8Iterator<A>* hll = static_cast<Hll8Iterator<A>*>(ptr);
- hll->~Hll8Iterator();
- itrAlloc().deallocate(hll, 1);
- }
- );
+uint8_t Hll8Array<A>::getSlot(const int slotNo) const {
+ return this->hllByteArr[slotNo];
}
template<typename A>
-int Hll8Array<A>::getSlot(const int slotNo) const {
- return (int) this->hllByteArr[slotNo] & HllUtil<A>::VAL_MASK_6;
+void Hll8Array<A>::putSlot(const int slotNo, uint8_t value) {
+ this->hllByteArr[slotNo] = value;
}
template<typename A>
-void Hll8Array<A>::putSlot(const int slotNo, const int value) {
- this->hllByteArr[slotNo] = value & HllUtil<A>::VAL_MASK_6;
+int Hll8Array<A>::getHllByteArrBytes() const {
+ return this->hll8ArrBytes(this->lgConfigK);
}
template<typename A>
-int Hll8Array<A>::getHllByteArrBytes() const {
- return this->hll8ArrBytes(this->lgConfigK);
+HllSketchImpl<A>* Hll8Array<A>::couponUpdate(int coupon) {
+ internalCouponUpdate(coupon);
+ return this;
}
template<typename A>
-HllSketchImpl<A>* Hll8Array<A>::couponUpdate(const int coupon) { // used by HLL_8 and HLL_6
+void Hll8Array<A>::internalCouponUpdate(int coupon) {
const int configKmask = (1 << this->lgConfigK) - 1;
const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
const int newVal = HllUtil<A>::getValue(coupon);
- if (newVal <= 0) {
- throw std::logic_error("newVal must be a positive integer: " + std::to_string(newVal));
- }
const int curVal = getSlot(slotNo);
if (newVal > curVal) {
putSlot(slotNo, newVal);
- this->hipAndKxQIncrementalUpdate(*this, curVal, newVal);
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
if (curVal == 0) {
- this->decNumAtCurMin(); // interpret numAtCurMin as num zeros
- if (this->getNumAtCurMin() < 0) {
- throw std::logic_error("getNumAtCurMin() must return a nonnegative integer: " + std::to_string(this->getNumAtCurMin()));
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
+ }
+ }
+}
+
+template<typename A>
+void Hll8Array<A>::mergeList(const CouponList<A>& src) {
+ for (auto coupon: src) {
+ internalCouponUpdate(coupon);
+ }
+}
+
+template<typename A>
+void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
+ // at this point src_k >= dst_k
+ const int src_k = 1 << src.getLgConfigK();
+ const int dst_mask = (1 << this->getLgConfigK()) - 1;
+ // duplication below is to avoid a virtual method call in a loop
+ if (src.getTgtHllType() == target_hll_type::HLL_8) {
+ for (int i = 0; i < src_k; i++) {
+ const uint8_t src_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
+ const int j = i & dst_mask;
+ const uint8_t dst_v = this->hllByteArr[j];
+ this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
+ if (src_v > dst_v) {
+ this->hipAndKxQIncrementalUpdate(dst_v, src_v);
+ if (dst_v == 0) {
+ this->numAtCurMin--;
+ }
+ }
+ }
+ } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
+ for (int i = 0; i < src_k; i++) {
+ const uint8_t src_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
+ const int j = i & dst_mask;
+ const uint8_t dst_v = this->hllByteArr[j];
+ this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
+ if (src_v > dst_v) {
+ this->hipAndKxQIncrementalUpdate(dst_v, src_v);
+ if (dst_v == 0) {
+ this->numAtCurMin--;
+ }
+ }
+ }
+ } else { // HLL_4
+ for (int i = 0; i < src_k; i++) {
+ const uint8_t src_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
+ const int j = i & dst_mask;
+ const uint8_t dst_v = this->hllByteArr[j];
+ this->hllByteArr[j] = src_v > dst_v ? src_v : dst_v;
+ if (src_v > dst_v) {
+ this->hipAndKxQIncrementalUpdate(dst_v, src_v);
+ if (dst_v == 0) {
+ this->numAtCurMin--;
+ }
}
}
}
- return this;
}
}
diff --git a/hll/include/Hll8Array.hpp b/hll/include/Hll8Array.hpp
index 7a79b84..2b0aefc 100644
--- a/hll/include/Hll8Array.hpp
+++ b/hll/include/Hll8Array.hpp
@@ -21,7 +21,6 @@
#define _HLL8ARRAY_HPP_
#include "HllArray.hpp"
-#include "HllPairIterator.hpp"
namespace datasketches {
@@ -39,29 +38,17 @@ class Hll8Array final : public HllArray<A> {
virtual Hll8Array<A>* copy() const;
- virtual pair_iterator_with_deleter<A> getIterator() const;
-
- virtual int getSlot(int slotNo) const final;
- virtual void putSlot(int slotNo, int value) final;
+ inline uint8_t getSlot(int slotNo) const;
+ inline void putSlot(int slotNo, uint8_t value);
virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
+ void mergeList(const CouponList<A>& src);
+ void mergeHll(const HllArray<A>& src);
virtual int getHllByteArrBytes() const;
- protected:
- friend class Hll8Iterator<A>;
-};
-
-template<typename A>
-class Hll8Iterator : public HllPairIterator<A> {
- public:
- Hll8Iterator(const Hll8Array<A>& array, int lengthPairs);
- virtual int value();
-
- virtual ~Hll8Iterator();
-
private:
- const Hll8Array<A>& hllArray;
+ inline void internalCouponUpdate(int coupon);
};
}
diff --git a/hll/include/HllArray-internal.hpp b/hll/include/HllArray-internal.hpp
index ee68371..f99180c 100644
--- a/hll/include/HllArray-internal.hpp
+++ b/hll/include/HllArray-internal.hpp
@@ -26,11 +26,11 @@
#include "CubicInterpolation.hpp"
#include "CompositeInterpolationXTable.hpp"
#include "CouponList.hpp"
-
#include <cstring>
#include <cmath>
#include <stdexcept>
#include <string>
+#include "../../common/include/inv_pow2_table.hpp"
namespace datasketches {
@@ -47,16 +47,16 @@ HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, boo
}
template<typename A>
-HllArray<A>::HllArray(const HllArray<A>& that)
- : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize) {
- hipAccum = that.getHipAccum();
- kxq0 = that.getKxQ0();
- kxq1 = that.getKxQ1();
- curMin = that.getCurMin();
- numAtCurMin = that.getNumAtCurMin();
- oooFlag = that.isOutOfOrderFlag();
-
- // can determine length, so allocate here
+HllArray<A>::HllArray(const HllArray<A>& that):
+HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
+hipAccum(that.hipAccum),
+kxq0(that.kxq0),
+kxq1(that.kxq1),
+hllByteArr(nullptr),
+curMin(that.curMin),
+numAtCurMin(that.numAtCurMin),
+oooFlag(that.oooFlag)
+{
const int arrayLen = that.getHllByteArrBytes();
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
hllByteArr = uint8Alloc().allocate(arrayLen);
@@ -74,7 +74,6 @@ HllArray<A>::~HllArray() {
} else { // tgtHllType == HLL_8
hllArrBytes = hll8ArrBytes(this->lgConfigK);
}
-
typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
uint8Alloc().deallocate(hllByteArr, hllArrBytes);
}
@@ -249,11 +248,9 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
if (auxHashMap != nullptr) {
if (compact) {
- pair_iterator_with_deleter<A> itr = auxHashMap->getIterator();
- while (itr->nextValid()) {
- const int pairValue = itr->getPair();
- std::memcpy(bytes, &pairValue, sizeof(pairValue));
- bytes += sizeof(pairValue);
+ for (uint32_t coupon: *auxHashMap) {
+ std::memcpy(bytes, &coupon, sizeof(coupon));
+ bytes += sizeof(coupon);
}
} else {
std::memcpy(bytes, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
@@ -310,10 +307,8 @@ void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
if (this->tgtHllType == HLL_4) {
if (auxHashMap != nullptr) {
if (compact) {
- pair_iterator_with_deleter<A> itr = auxHashMap->getIterator();
- while (itr->nextValid()) {
- const int pairValue = itr->getPair();
- os.write((char*)&pairValue, sizeof(pairValue));
+ for (uint32_t coupon: *auxHashMap) {
+ os.write((char*)&coupon, sizeof(coupon));
}
} else {
os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
@@ -431,8 +426,6 @@ double HllArray<A>::getCompositeEstimate() const {
// Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
- //Alternate call
- //if ((adjEst > (3 << this->lgConfigK)) || ((curMin != 0) || (numAtCurMin == 0)) ) { return adjEst; }
const double linEst =
getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
@@ -587,32 +580,20 @@ int HllArray<A>::getPreInts() const {
}
template<typename A>
-pair_iterator_with_deleter<A> HllArray<A>::getAuxIterator() const {
- return nullptr;
-}
-
-template<typename A>
AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
return nullptr;
}
template<typename A>
-void HllArray<A>::hipAndKxQIncrementalUpdate(HllArray<A>& host, const int oldValue, const int newValue) {
- if (newValue <= oldValue) {
- throw std::invalid_argument("newValue must be greater than oldValue: " + std::to_string(newValue)
- + " vs " + std::to_string(oldValue));
- }
-
- const int configK = 1 << host.getLgConfigK();
- // update hipAccum BEFORE updating kxq0 and kxq1
- double kxq0 = host.getKxQ0();
- double kxq1 = host.getKxQ1();
- host.addToHipAccum(configK / (kxq0 + kxq1));
+void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
+ const int configK = 1 << this->getLgConfigK();
+ // update hip BEFORE updating kxq
+ hipAccum += configK / (kxq0 + kxq1);
// update kxq0 and kxq1; subtract first, then add
- if (oldValue < 32) { host.putKxQ0(kxq0 -= HllUtil<A>::invPow2(oldValue)); }
- else { host.putKxQ1(kxq1 -= HllUtil<A>::invPow2(oldValue)); }
- if (newValue < 32) { host.putKxQ0(kxq0 += HllUtil<A>::invPow2(newValue)); }
- else { host.putKxQ1(kxq1 += HllUtil<A>::invPow2(newValue)); }
+ if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
+ else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
+ if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
+ else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
}
/**
@@ -648,6 +629,71 @@ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum)
return hyperEst;
}
+template<typename A>
+typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
+}
+
+template<typename A>
+typename HllArray<A>::const_iterator HllArray<A>::end() const {
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
+}
+
+template<typename A>
+HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
+array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
+{
+ while (this->index < array_size) {
+ value = get_value(array, this->index, hll_type);
+ if (all || value != HllUtil<A>::EMPTY) break;
+ this->index++;
+ }
+}
+
+template<typename A>
+typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
+ while (++index < array_size) {
+ value = get_value(array, index, hll_type);
+ if (all || value != HllUtil<A>::EMPTY) break;
+ }
+ return *this;
+}
+
+template<typename A>
+bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
+ return index != other.index;
+}
+
+template<typename A>
+uint32_t HllArray<A>::const_iterator::operator*() const {
+ if (hll_type == target_hll_type::HLL_4) {
+ if (value == HllUtil<A>::AUX_TOKEN) { // exception
+ return HllUtil<A>::pair(index, exceptions->mustFindValueFor(index));
+ }
+ return HllUtil<A>::pair(index, value + offset);
+ }
+ return HllUtil<A>::pair(index, value);
+}
+
+template<typename A>
+uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type) {
+ if (hll_type == target_hll_type::HLL_4) {
+ const uint8_t value = array[index >> 1];
+ if ((index & 1) > 0) { // odd
+ return value >> 4;
+ }
+ return value & HllUtil<A>::loNibbleMask;
+ } else if (hll_type == target_hll_type::HLL_6) {
+ const int start_bit = index * 6;
+ const int shift = start_bit & 0x7;
+ const int byte_idx = start_bit >> 3;
+ const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
+ return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
+ }
+ // HLL_8
+ return array[index];
+}
+
}
#endif // _HLLARRAY_INTERNAL_HPP_
diff --git a/hll/include/HllArray.hpp b/hll/include/HllArray.hpp
index 206869a..986587f 100644
--- a/hll/include/HllArray.hpp
+++ b/hll/include/HllArray.hpp
@@ -22,7 +22,6 @@
#include "HllSketchImpl.hpp"
#include "HllUtil.hpp"
-#include "PairIterator.hpp"
namespace datasketches {
@@ -54,19 +53,16 @@ class HllArray : public HllSketchImpl<A> {
virtual double getLowerBound(int numStdDev) const;
virtual double getUpperBound(int numStdDev) const;
- void addToHipAccum(double delta);
+ inline void addToHipAccum(double delta);
- void decNumAtCurMin();
+ inline void decNumAtCurMin();
- int getCurMin() const;
- int getNumAtCurMin() const;
- double getHipAccum() const;
+ inline int getCurMin() const;
+ inline int getNumAtCurMin() const;
+ inline double getHipAccum() const;
virtual int getHllByteArrBytes() const = 0;
- virtual pair_iterator_with_deleter<A> getIterator() const = 0;
- virtual pair_iterator_with_deleter<A> getAuxIterator() const;
-
virtual int getUpdatableSerializationBytes() const;
virtual int getCompactSerializationBytes() const;
@@ -76,19 +72,16 @@ class HllArray : public HllSketchImpl<A> {
virtual void putOutOfOrderFlag(bool flag);
- double getKxQ0() const;
- double getKxQ1() const;
+ inline double getKxQ0() const;
+ inline double getKxQ1() const;
virtual int getMemDataStart() const;
virtual int getPreInts() const;
- virtual int getSlot(int slotNo) const = 0;
- virtual void putSlot(int slotNo, int value) = 0;
-
void putCurMin(int curMin);
void putHipAccum(double hipAccum);
- void putKxQ0(double kxq0);
- void putKxQ1(double kxq1);
+ inline void putKxQ0(double kxq0);
+ inline void putKxQ1(double kxq1);
void putNumAtCurMin(int numAtCurMin);
static int hllArrBytes(target_hll_type tgtHllType, int lgConfigK);
@@ -98,9 +91,12 @@ class HllArray : public HllSketchImpl<A> {
virtual AuxHashMap<A>* getAuxHashMap() const;
+ class const_iterator;
+ virtual const_iterator begin(bool all = false) const;
+ virtual const_iterator end() const;
+
protected:
- // TODO: does this need to be static?
- static void hipAndKxQIncrementalUpdate(HllArray& host, int oldValue, int newValue);
+ void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
double getHllBitMapEstimate(int lgConfigK, int curMin, int numAtCurMin) const;
double getHllRawEstimate(int lgConfigK, double kxqSum) const;
@@ -108,13 +104,33 @@ class HllArray : public HllSketchImpl<A> {
double kxq0;
double kxq1;
uint8_t* hllByteArr; //init by sub-classes
- int curMin; //always zero for Hll6 and Hll8, only used / tracked by Hll4Array
+ int curMin; //always zero for Hll6 and Hll8, only tracked by Hll4Array
int numAtCurMin; //interpreted as num zeros when curMin == 0
bool oooFlag; //Out-Of-Order Flag
friend class HllSketchImplFactory<A>;
};
+template<typename A>
+class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
+public:
+ const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
+ //const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
+ const_iterator& operator++();
+ bool operator!=(const const_iterator& other) const;
+ uint32_t operator*() const;
+private:
+ const uint8_t* array;
+ size_t array_size;
+ size_t index;
+ target_hll_type hll_type;
+ const AuxHashMap<A>* exceptions;
+ uint8_t offset;
+ bool all;
+ uint8_t value; // cached value to avoid computing in operator++ and in operator*()
+ static inline uint8_t get_value(const uint8_t* array, size_t index, target_hll_type hll_type);
+};
+
}
#endif /* _HLLARRAY_HPP_ */
diff --git a/hll/include/HllPairIterator-internal.hpp b/hll/include/HllPairIterator-internal.hpp
deleted file mode 100644
index 0efa900..0000000
--- a/hll/include/HllPairIterator-internal.hpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef _HLLPAIRITERATOR_INTERNAL_HPP_
-#define _HLLPAIRITERATOR_INTERNAL_HPP_
-
-#include <sstream>
-#include <iomanip>
-
-#include "HllUtil.hpp"
-#include "HllPairIterator.hpp"
-
-namespace datasketches {
-
-template<typename A>
-HllPairIterator<A>::HllPairIterator(const int lengthPairs)
- : lengthPairs(lengthPairs),
- index(-1),
- val(-1)
-{ }
-
-template<typename A>
-std::string HllPairIterator<A>::getHeader() {
- std::ostringstream ss;
- ss << std::setw(10) << "Slot" << std::setw(6) << "Value";
- return ss.str();
-}
-
-template<typename A>
-int HllPairIterator<A>::getIndex() {
- return index;
-}
-
-template<typename A>
-int HllPairIterator<A>::getKey() {
- return index;
-}
-
-template<typename A>
-int HllPairIterator<A>::getSlot() {
- return index;
-}
-
-template<typename A>
-int HllPairIterator<A>::getPair() {
- return HllUtil<A>::pair(index, val);
-}
-
-template<typename A>
-int HllPairIterator<A>::getValue() {
- return val;
-}
-
-template<typename A>
-std::string HllPairIterator<A>::getString() {
- std::ostringstream ss;
- ss << std::setw(10) << getSlot() << std::setw(6) << getValue();
- return ss.str();
-}
-
-template<typename A>
-bool HllPairIterator<A>::nextAll() {
- if (++index < lengthPairs) {
- val = value();
- return true;
- }
- return false;
-}
-
-template<typename A>
-bool HllPairIterator<A>::nextValid() {
- while (++index < lengthPairs) {
- val = value();
- if (val != HllUtil<A>::EMPTY) {
- return true;
- }
- }
- return false;
-}
-
-}
-
-#endif // _HLLPAIRITERATOR_INTERNAL_HPP_
diff --git a/hll/include/HllSketch-internal.hpp b/hll/include/HllSketch-internal.hpp
index 5b86848..264b652 100644
--- a/hll/include/HllSketch-internal.hpp
+++ b/hll/include/HllSketch-internal.hpp
@@ -31,6 +31,7 @@
#include <string>
#include <iostream>
#include <sstream>
+#include <iomanip>
namespace datasketches {
@@ -270,49 +271,77 @@ std::ostream& hll_sketch_alloc<A>::to_string(std::ostream& os,
<< " LB : " << get_lower_bound(1) << std::endl
<< " Estimate : " << get_estimate() << std::endl
<< " UB : " << get_upper_bound(1) << std::endl
- << " OutOfOrder flag: " << is_out_of_order_flag() << std::endl;
+ << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
if (get_current_mode() == HLL) {
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
- os << " CurMin : " << hllArray->getCurMin() << std::endl
- << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
- << " HipAccum : " << hllArray->getHipAccum() << std::endl
- << " KxQ0 : " << hllArray->getKxQ0() << std::endl
- << " KxQ1 : " << hllArray->getKxQ1() << std::endl;
+ os << " CurMin : " << hllArray->getCurMin() << std::endl
+ << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
+ << " HipAccum : " << hllArray->getHipAccum() << std::endl
+ << " KxQ0 : " << hllArray->getKxQ0() << std::endl
+ << " KxQ1 : " << hllArray->getKxQ1() << std::endl;
} else {
- os << " Coupon count : "
+ os << " Coupon count : "
<< std::to_string(((CouponList<A>*) sketch_impl)->getCouponCount()) << std::endl;
}
}
if (detail) {
os << "### HLL SKETCH DATA DETAIL: " << std::endl;
- pair_iterator_with_deleter<A> pitr = get_iterator();
- os << pitr->getHeader() << std::endl;
- if (all) {
- while (pitr->nextAll()) {
- os << pitr->getString() << std::endl;
+ if (get_current_mode() == HLL) {
+ const HllArray<A>* hll_ptr = static_cast<const HllArray<A>*>(sketch_impl);
+ os << std::left << std::setw(10) << "Slot" << std::setw(6) << "Value" << std::endl;
+ auto it = hll_ptr->begin(all);
+ while (it != hll_ptr->end()) {
+ os << std::setw(10) << HllUtil<A>::getLow26(*it);
+ os << std::setw(6) << HllUtil<A>::getValue(*it);
+ os << std::endl;
+ ++it;
}
} else {
- while (pitr->nextValid()) {
- os << pitr->getString() << std::endl;
+ const CouponList<A>* list_ptr = static_cast<const CouponList<A>*>(sketch_impl);
+ os << std::left;
+ os << std::setw(10) << "Index";
+ os << std::setw(10) << "Key";
+ os << std::setw(10) << "Slot";
+ os << std::setw(6) << "Value";
+ os << std::endl;
+ auto it = list_ptr->begin(all);
+ int i = 0;
+ int mask = (1 << get_lg_config_k()) - 1;
+ while (it != list_ptr->end()) {
+ os << std::setw(10) << i;
+ os << std::setw(10) << HllUtil<A>::getLow26(*it);
+ os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
+ os << std::setw(6) << HllUtil<A>::getValue(*it);
+ os << std::endl;
+ ++it;
+ ++i;
}
}
}
if (aux_detail) {
if ((get_current_mode() == HLL) && (get_target_type() == HLL_4)) {
- HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
- pair_iterator_with_deleter<A> auxItr = hllArray->getAuxIterator();
- if (auxItr != nullptr) {
- os << "### HLL SKETCH AUX DETAIL: " << std::endl
- << auxItr->getHeader() << std::endl;
- if (all) {
- while (auxItr->nextAll()) {
- os << auxItr->getString() << std::endl;
- }
- } else {
- while (auxItr->nextValid()) {
- os << auxItr->getString() << std::endl;
- }
+ const Hll4Array<A>* hll4_ptr = static_cast<const Hll4Array<A>*>(sketch_impl);
+ const AuxHashMap<A>* aux_ptr = hll4_ptr->getAuxHashMap();
+ if (aux_ptr != nullptr) {
+ os << "### HLL SKETCH AUX DETAIL: " << std::endl;
+ os << std::left;
+ os << std::setw(10) << "Index";
+ os << std::setw(10) << "Key";
+ os << std::setw(10) << "Slot";
+ os << std::setw(6) << "Value";
+ os << std::endl;
+ auto it = aux_ptr->begin(all);
+ int i = 0;
+ int mask = (1 << get_lg_config_k()) - 1;
+ while (it != aux_ptr->end()) {
+ os << std::setw(10) << i;
+ os << std::setw(10) << HllUtil<A>::getLow26(*it);
+ os << std::setw(10) << (HllUtil<A>::getLow26(*it) & mask);
+ os << std::setw(6) << HllUtil<A>::getValue(*it);
+ os << std::endl;
+ ++it;
+ ++i;
}
}
}
@@ -387,11 +416,6 @@ bool hll_sketch_alloc<A>::is_empty() const {
}
template<typename A>
-pair_iterator_with_deleter<A> hll_sketch_alloc<A>::get_iterator() const {
- return sketch_impl->getIterator();
-}
-
-template<typename A>
std::string hll_sketch_alloc<A>::type_as_string() const {
switch (sketch_impl->getTgtHllType()) {
case target_hll_type::HLL_4:
diff --git a/hll/include/HllSketchImpl.hpp b/hll/include/HllSketchImpl.hpp
index 0e9f2b3..82180b4 100644
--- a/hll/include/HllSketchImpl.hpp
+++ b/hll/include/HllSketchImpl.hpp
@@ -22,7 +22,6 @@
#include "HllUtil.hpp"
#include "hll.hpp" // for TgtHllType
-#include "PairIterator.hpp"
#include <memory>
@@ -52,9 +51,7 @@ class HllSketchImpl {
virtual double getUpperBound(int numStdDev) const = 0;
virtual double getLowerBound(int numStdDev) const = 0;
- virtual pair_iterator_with_deleter<A> getIterator() const = 0;
-
- int getLgConfigK() const;
+ inline int getLgConfigK() const;
virtual int getMemDataStart() const = 0;
diff --git a/hll/include/HllSketchImplFactory.hpp b/hll/include/HllSketchImplFactory.hpp
index d881360..eae6f75 100644
--- a/hll/include/HllSketchImplFactory.hpp
+++ b/hll/include/HllSketchImplFactory.hpp
@@ -41,40 +41,33 @@ public:
static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false);
- // resets the input impl, deleting the input pointert and returning a new pointer
+ // resets the input impl, deleting the input pointer and returning a new pointer
static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
static Hll4Array<A>* convertToHll4(const HllArray<A>& srcHllArr);
static Hll6Array<A>* convertToHll6(const HllArray<A>& srcHllArr);
static Hll8Array<A>* convertToHll8(const HllArray<A>& srcHllArr);
-
-private:
- static int curMinAndNum(const HllArray<A>& hllArr);
};
template<typename A>
CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
- pair_iterator_with_deleter<A> iter = list.getIterator();
-
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType());
- while (iter->nextValid()) {
- chSet->couponUpdate(iter->getPair());
+ for (auto coupon: list) {
+ chSet->couponUpdate(coupon);
}
chSet->putOutOfOrderFlag(true);
-
return chSet;
}
template<typename A>
HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType());
- pair_iterator_with_deleter<A> srcItr = src.getIterator();
tgtHllArr->putKxQ0(1 << src.getLgConfigK());
- while (srcItr->nextValid()) {
- tgtHllArr->couponUpdate(srcItr->getPair());
- tgtHllArr->putHipAccum(src.getEstimate());
+ for (auto coupon: src) {
+ tgtHllArr->couponUpdate(coupon);
}
+ tgtHllArr->putHipAccum(src.getEstimate());
tgtHllArr->putOutOfOrderFlag(false);
return tgtHllArr;
}
@@ -146,76 +139,18 @@ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllAr
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize());
hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
-
- // 1st pass: compute starting curMin and numAtCurMin
- int pairVals = curMinAndNum(srcHllArr);
- int curMin = HllUtil<A>::getValue(pairVals);
- int numAtCurMin = HllUtil<A>::getLow26(pairVals);
-
- // 2nd pass: must know curMin.
- // Populate KxQ registers, build AuxHashMap if needed
- pair_iterator_with_deleter<A> itr = srcHllArr.getIterator();
- // nothing allocated, may be null
- AuxHashMap<A>* auxHashMap = srcHllArr.getAuxHashMap();
-
- while (itr->nextValid()) {
- const int slotNo = itr->getIndex();
- const int actualValue = itr->getValue();
- HllArray<A>::hipAndKxQIncrementalUpdate(*hll4Array, 0, actualValue);
- if (actualValue >= (curMin + 15)) {
- hll4Array->putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
- if (auxHashMap == nullptr) {
- auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[lgConfigK], lgConfigK);
- hll4Array->putAuxHashMap(auxHashMap);
- }
- auxHashMap->mustAdd(slotNo, actualValue);
- } else {
- hll4Array->putSlot(slotNo, actualValue - curMin);
- }
- }
-
- hll4Array->putCurMin(curMin);
- hll4Array->putNumAtCurMin(numAtCurMin);
+ hll4Array->mergeHll(srcHllArr);
hll4Array->putHipAccum(srcHllArr.getHipAccum());
-
return hll4Array;
}
template<typename A>
-int HllSketchImplFactory<A>::curMinAndNum(const HllArray<A>& hllArr) {
- int curMin = 64;
- int numAtCurMin = 0;
- pair_iterator_with_deleter<A> itr = hllArr.getIterator();
- while (itr->nextAll()) {
- int v = itr->getValue();
- if (v < curMin) {
- curMin = v;
- numAtCurMin = 1;
- } else if (v == curMin) {
- ++numAtCurMin;
- }
- }
-
- return HllUtil<A>::pair(numAtCurMin, curMin);
-}
-
-template<typename A>
Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
const int lgConfigK = srcHllArr.getLgConfigK();
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize());
hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
-
- int numZeros = 1 << lgConfigK;
- pair_iterator_with_deleter<A> itr = srcHllArr.getIterator();
- while (itr->nextAll()) {
- if (itr->getValue() != HllUtil<A>::EMPTY) {
- --numZeros;
- hll6Array->couponUpdate(itr->getPair());
- }
- }
-
- hll6Array->putNumAtCurMin(numZeros);
+ hll6Array->mergeHll(srcHllArr);
hll6Array->putHipAccum(srcHllArr.getHipAccum());
return hll6Array;
}
@@ -226,17 +161,7 @@ Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllAr
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize());
hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
-
- int numZeros = 1 << lgConfigK;
- pair_iterator_with_deleter<A> itr = srcHllArr.getIterator();
- while (itr->nextAll()) {
- if (itr->getValue() != HllUtil<A>::EMPTY) {
- --numZeros;
- hll8Array->couponUpdate(itr->getPair());
- }
- }
-
- hll8Array->putNumAtCurMin(numZeros);
+ hll8Array->mergeHll(srcHllArr);
hll8Array->putHipAccum(srcHllArr.getHipAccum());
return hll8Array;
}
diff --git a/hll/include/HllUnion-internal.hpp b/hll/include/HllUnion-internal.hpp
index be6320a..ff4c262 100644
--- a/hll/include/HllUnion-internal.hpp
+++ b/hll/include/HllUnion-internal.hpp
@@ -77,7 +77,7 @@ hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type)
template<typename A>
void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
- union_impl(static_cast<const hll_sketch_alloc<A>&>(sketch).sketch_impl, lg_max_k);
+ union_impl(sketch, lg_max_k);
}
template<typename A>
@@ -269,25 +269,22 @@ double hll_union_alloc<A>::get_rel_err(const bool upper_bound, const bool unione
}
template<typename A>
-HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(HllSketchImpl<A>* src_impl, const int tgt_lg_k) {
+HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* src_impl, const int tgt_lg_k) {
if (src_impl->getCurMode() != HLL) {
throw std::logic_error("Attempt to downsample non-HLL sketch");
}
- HllArray<A>* src = (HllArray<A>*) src_impl;
+ const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
const int src_lg_k = src->getLgConfigK();
- if ((src_lg_k <= tgt_lg_k) && (src->getTgtHllType() == target_hll_type::HLL_8)) {
- return src->copy();
+ if (src_lg_k <= tgt_lg_k) {
+ return src->copyAs(target_hll_type::HLL_8);
}
const int minLgK = ((src_lg_k < tgt_lg_k) ? src_lg_k : tgt_lg_k);
- HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(minLgK, target_hll_type::HLL_8);
- pair_iterator_with_deleter<A> srcItr = src->getIterator();
- while (srcItr->nextValid()) {
- tgtHllArr->couponUpdate(srcItr->getPair());
- }
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
+ Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(minLgK, false);
+ tgtHllArr->mergeHll(*src);
//both of these are required for isomorphism
tgtHllArr->putHipAccum(src->getHipAccum());
tgtHllArr->putOutOfOrderFlag(src->isOutOfOrderFlag());
-
return tgtHllArr;
}
@@ -301,169 +298,68 @@ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchIm
}
template<typename A>
-void hll_union_alloc<A>::union_impl(HllSketchImpl<A>* incoming_impl, const int lg_max_k) {
- if (gadget.sketch_impl->getTgtHllType() != target_hll_type::HLL_8) {
- throw std::logic_error("Must call unionImpl() with HLL_8 input");
- }
- HllSketchImpl<A>* src_impl = incoming_impl; //default
- HllSketchImpl<A>* dstImpl = gadget.sketch_impl; //default
- if ((incoming_impl == nullptr) || incoming_impl->isEmpty()) {
- return; // gadget.sketch_impl;
- }
+void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
+ if (sketch.is_empty()) return;
+ const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
+ HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
- const int hi2bits = (gadget.sketch_impl->isEmpty()) ? 3 : gadget.sketch_impl->getCurMode();
- const int lo2bits = incoming_impl->getCurMode();
+ const int lo2bits = src_impl->getCurMode();
+ const int hi2bits = (dst_impl->isEmpty()) ? 3 : dst_impl->getCurMode();
const int sw = (hi2bits << 2) | lo2bits;
switch (sw) {
- case 0: { //src: LIST, gadget: LIST
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- //whichever is True wins:
- dstImpl->putOutOfOrderFlag(dstImpl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
- // gadget: cleanly updated as needed
- break;
- }
- case 1: { //src: SET, gadget: LIST
- //consider a swap here
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //SET
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- dstImpl->putOutOfOrderFlag(true); //SET oooFlag is always true
- // gadget: cleanly updated as needed
- break;
- }
- case 2: { //src: HLL, gadget: LIST
- //swap so that src is gadget-LIST, tgt is HLL
- //use lg_max_k because LIST has effective K of 2^26
- src_impl = gadget.sketch_impl;
- dstImpl = copy_or_downsample(incoming_impl, lg_max_k);
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator();
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- //whichever is True wins:
- dstImpl->putOutOfOrderFlag(src_impl->isOutOfOrderFlag() | dstImpl->isOutOfOrderFlag());
- // gadget: swapped, replacing with new impl
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl);
- break;
- }
- case 4: { //src: LIST, gadget: SET
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
+ case 0: // src: LIST, gadget: LIST
+ case 1: // src: SET, gadget: LIST
+ case 4: // src: LIST, gadget: SET
+ case 5: // src: SET, gadget: SET
+ case 8: // src: LIST, gadget: HLL
+ case 9: // src: SET, gadget: HLL
+ case 12: // src: LIST, gadget: empty
+ case 13: // src: SET, gadget: empty
+ {
+ if (dst_impl->isEmpty() and src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
+ dst_impl = src_impl->copyAs(target_hll_type::HLL_8);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
}
- dstImpl->putOutOfOrderFlag(true); //SET oooFlag is always true
- // gadget: cleanly updated as needed
- break;
- }
- case 5: { //src: SET, gadget: SET
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //SET
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
+ const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
+ for (auto coupon: *src) {
+ dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
}
- dstImpl->putOutOfOrderFlag(true); //SET oooFlag is always true
- // gadget: cleanly updated as needed
break;
}
- case 6: { //src: HLL, gadget: SET
- //swap so that src is gadget-SET, tgt is HLL
- //use lg_max_k because LIST has effective K of 2^26
+ case 2: // src: HLL, gadget: LIST
+ case 6: // src: HLL, gadget: SET
+ {
+ // swap so that src is LIST or SET, tgt is HLL
+ // use lg_max_k because LIST has effective K of 2^26
+ dst_impl = copy_or_downsample(src_impl, lg_max_k);
src_impl = gadget.sketch_impl;
- dstImpl = copy_or_downsample(incoming_impl, lg_max_k);
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
- if (dstImpl->getCurMode() != HLL) {
- throw std::logic_error("dstImpl must be in HLL mode");
- }
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- dstImpl->putOutOfOrderFlag(true); //merging SET into non-empty HLL -> true
- // gadget: swapped, replacing with new impl
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl);
- break;
- }
- case 8: { //src: LIST, gadget: HLL
- if (dstImpl->getCurMode() != HLL) {
- throw std::logic_error("dstImpl must be in HLL mode");
- }
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- //whichever is True wins:
- dstImpl->putOutOfOrderFlag(dstImpl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
- // gadget: should remain unchanged
- if (dstImpl != gadget.sketch_impl) {
- // should not have changed from HLL
- throw std::logic_error("dstImpl unepxectedly changed from gadget");
- }
- break;
- }
- case 9: { //src: SET, gadget: HLL
- if (dstImpl->getCurMode() != HLL) {
- throw std::logic_error("dstImpl must be in HLL mode");
- }
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //SET
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- dstImpl->putOutOfOrderFlag(true); //merging SET into existing HLL -> true
- // gadget: should remain unchanged
- if (dstImpl != gadget.sketch_impl) {
- // should not have changed from HLL
- throw std::logic_error("dstImpl unepxectedly changed from gadget");
- }
+ const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
+ Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
+ dst->mergeList(*src);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
break;
}
- case 10: { //src: HLL, gadget: HLL
+ case 10: { // src: HLL, gadget: HLL
const int src_lg_k = src_impl->getLgConfigK();
- const int dstLgK = dstImpl->getLgConfigK();
- const int minLgK = ((src_lg_k < dstLgK) ? src_lg_k : dstLgK);
- if ((src_lg_k < dstLgK) || (dstImpl->getTgtHllType() != HLL_8)) {
- dstImpl = copy_or_downsample(dstImpl, minLgK);
- // always replaces gadget
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl);
- }
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //HLL
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- dstImpl->putOutOfOrderFlag(true); //union of two HLL modes is always true
- // gadget: replaced if copied/downampled, otherwise should be unchanged
- break;
- }
- case 12: { //src: LIST, gadget: empty
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //LIST
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
- }
- dstImpl->putOutOfOrderFlag(src_impl->isOutOfOrderFlag()); //whatever source is
- // gadget: cleanly updated as needed
- break;
- }
- case 13: { //src: SET, gadget: empty
- pair_iterator_with_deleter<A> srcItr = src_impl->getIterator(); //SET
- while (srcItr->nextValid()) {
- dstImpl = leak_free_coupon_update(dstImpl, srcItr->getPair()); //assignment required
+ const int dst_lg_k = dst_impl->getLgConfigK();
+ if (src_lg_k < dst_lg_k) {
+ dst_impl = copy_or_downsample(dst_impl, src_lg_k);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
}
- dstImpl->putOutOfOrderFlag(true); //SET oooFlag is always true
- // gadget: cleanly updated as needed
+ const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
+ Hll8Array<A>* dst = static_cast<Hll8Array<A>*>(dst_impl);
+ dst->mergeHll(*src);
break;
}
case 14: { //src: HLL, gadget: empty
- dstImpl = copy_or_downsample(src_impl, lg_max_k);
- dstImpl->putOutOfOrderFlag(src_impl->isOutOfOrderFlag()); //whatever source is.
- // gadget: always replaced with copied/downsampled sketch
- gadget.sketch_impl->get_deleter()(gadget.sketch_impl);
+ dst_impl = copy_or_downsample(src_impl, lg_max_k);
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget replaced
break;
}
}
-
- gadget.sketch_impl = dstImpl;
+ dst_impl->putOutOfOrderFlag(dst_impl->isOutOfOrderFlag() | src_impl->isOutOfOrderFlag());
+ gadget.sketch_impl = dst_impl;
}
}
diff --git a/hll/include/IntArrayPairIterator-internal.hpp b/hll/include/IntArrayPairIterator-internal.hpp
deleted file mode 100644
index 842e660..0000000
--- a/hll/include/IntArrayPairIterator-internal.hpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef _INTARRAYPAIRITERATOR_INTERNAL_HPP_
-#define _INTARRAYPAIRITERATOR_INTERNAL_HPP_
-
-#include <iomanip>
-#include <sstream>
-
-#include "HllUtil.hpp"
-#include "IntArrayPairIterator.hpp"
-
-namespace datasketches {
-
-template<typename A>
-IntArrayPairIterator<A>::IntArrayPairIterator(const int* array, const int len, const int lgConfigK)
- : array(array),
- slotMask((1 << lgConfigK) - 1),
- lengthPairs(len) {
- index = -1;
- pair = -1;
-}
-
-template<typename A>
-std::string IntArrayPairIterator<A>::getHeader() {
- std::ostringstream ss;
- ss << std::left
- << std::setw(10) << "Index"
- << std::setw(10) << "Key"
- << std::setw(10) << "Slot"
- << std::setw(6) << "Value";
- return ss.str();
-}
-
-template<typename A>
-std::string IntArrayPairIterator<A>::getString() {
- std::ostringstream ss;
- ss << std::left
- << std::setw(10) << getIndex()
- << std::setw(10) << getKey()
- << std::setw(10) << getSlot()
- << std::setw(6) << getValue();
- return ss.str();
-}
-
-template<typename A>
-int IntArrayPairIterator<A>::getIndex() {
- return index;
-}
-
-template<typename A>
-int IntArrayPairIterator<A>::getKey() {
- return HllUtil<A>::getLow26(pair);
-}
-
-template<typename A>
-int IntArrayPairIterator<A>::getPair() {
- return pair;
-}
-
-template<typename A>
-int IntArrayPairIterator<A>::getSlot() {
- return getKey() & slotMask;
-}
-
-template<typename A>
-int IntArrayPairIterator<A>::getValue() {
- return HllUtil<A>::getValue(pair);
-}
-
-template<typename A>
-bool IntArrayPairIterator<A>::nextAll() {
- if (++index < lengthPairs) {
- pair = array[index];
- return true;
- }
- return false;
-}
-
-template<typename A>
-bool IntArrayPairIterator<A>::nextValid() {
- while (++index < lengthPairs) {
- const int p = array[index];
- if (p != HllUtil<A>::EMPTY) {
- pair = p;
- return true;
- }
- }
- return false;
-}
-
-}
-
-#endif // _INTARRAYPAIRITERATOR_INTERNAL_HPP_
diff --git a/hll/include/PairIterator.hpp b/hll/include/PairIterator.hpp
deleted file mode 100644
index 14d20d8..0000000
--- a/hll/include/PairIterator.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef _PAIRITERATOR_HPP_
-#define _PAIRITERATOR_HPP_
-
-#include <string>
-#include <functional>
-#include <memory>
-
-namespace datasketches {
-
-template<typename A = std::allocator<char>>
-class PairIterator {
- public:
- virtual std::string getHeader() = 0;
-
- virtual int getIndex() = 0;
- virtual int getKey() = 0;
- virtual int getPair() = 0;
- virtual int getSlot() = 0;
-
- virtual std::string getString() = 0;
-
- virtual int getValue() = 0;
- virtual bool nextAll() = 0;
- virtual bool nextValid() = 0;
-
- virtual ~PairIterator() = default;
-};
-
-template<typename A = std::allocator<char>>
-using pair_iterator_with_deleter = std::unique_ptr<PairIterator<A>, std::function<void(PairIterator<A>*)>>;
-
-}
-
-#endif /* _PAIRITERATOR_HPP_ */
diff --git a/hll/include/HllPairIterator.hpp b/hll/include/coupon_iterator-internal.hpp
similarity index 50%
rename from hll/include/HllPairIterator.hpp
rename to hll/include/coupon_iterator-internal.hpp
index 3633059..35d0e0b 100644
--- a/hll/include/HllPairIterator.hpp
+++ b/hll/include/coupon_iterator-internal.hpp
@@ -17,39 +17,40 @@
* under the License.
*/
-#ifndef _HLLPAIRITERATOR_HPP_
-#define _HLLPAIRITERATOR_HPP_
+#ifndef _INTARRAYPAIRITERATOR_INTERNAL_HPP_
+#define _INTARRAYPAIRITERATOR_INTERNAL_HPP_
-#include "PairIterator.hpp"
-#include "HllArray.hpp"
+#include "HllUtil.hpp"
namespace datasketches {
template<typename A>
-class HllPairIterator : public PairIterator<A> {
- public:
- HllPairIterator(const int lengthPairs);
- virtual ~HllPairIterator() = default;
- virtual std::string getHeader();
- virtual int getIndex();
- virtual int getKey();
- virtual int getPair();
- virtual int getSlot();
- virtual std::string getString();
- virtual int getValue();
- virtual bool nextAll();
- virtual bool nextValid();
-
- protected:
- virtual int value() = 0;
-
- const int lengthPairs;
- int index;
- int val;
-};
+coupon_iterator<A>::coupon_iterator(const int* array, size_t array_size, size_t index, bool all):
+array(array), array_size(array_size), index(index), all(all) {
+ while (this->index < array_size) {
+ if (all || array[this->index] != HllUtil<A>::EMPTY) break;
+ this->index++;
+ }
+}
+template<typename A>
+coupon_iterator<A>& coupon_iterator<A>::operator++() {
+ while (++index < array_size) {
+ if (all || array[index] != HllUtil<A>::EMPTY) break;
+ }
+ return *this;
+}
+
+template<typename A>
+bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
+ return index != other.index;
}
-#include "HllPairIterator-internal.hpp"
+template<typename A>
+uint32_t coupon_iterator<A>::operator*() const {
+ return array[index];
+}
+
+}
-#endif /* _HLLPAIRITERATOR_HPP_ */
+#endif // _INTARRAYPAIRITERATOR_INTERNAL_HPP_
diff --git a/hll/include/IntArrayPairIterator.hpp b/hll/include/coupon_iterator.hpp
similarity index 59%
rename from hll/include/IntArrayPairIterator.hpp
rename to hll/include/coupon_iterator.hpp
index 9b647a2..9f373cf 100644
--- a/hll/include/IntArrayPairIterator.hpp
+++ b/hll/include/coupon_iterator.hpp
@@ -20,40 +20,24 @@
#ifndef _INTARRAYPAIRITERATOR_HPP_
#define _INTARRAYPAIRITERATOR_HPP_
-#include "PairIterator.hpp"
-
namespace datasketches {
template<typename A>
-class IntArrayPairIterator : public PairIterator<A> {
- public:
- explicit IntArrayPairIterator(const int* array, int len, int lgConfigK);
-
- virtual ~IntArrayPairIterator() = default;
-
- virtual std::string getHeader();
-
- virtual int getIndex();
- virtual int getKey();
- virtual int getPair();
- virtual int getSlot();
-
- virtual std::string getString();
-
- virtual int getValue();
- virtual bool nextAll();
- virtual bool nextValid();
-
- protected:
- const int* array;
- const int slotMask;
- const int lengthPairs;
- int index;
- int pair;
+class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
+public:
+ coupon_iterator(const int* array, size_t array_slze, size_t index, bool all);
+ coupon_iterator& operator++();
+ bool operator!=(const coupon_iterator& other) const;
+ uint32_t operator*() const;
+private:
+ const int* array;
+ size_t array_size;
+ size_t index;
+ bool all;
};
}
-#include "IntArrayPairIterator-internal.hpp"
+#include "coupon_iterator-internal.hpp"
#endif /* _INTARRAYPAIRITERATOR_HPP_ */
diff --git a/hll/include/hll.hpp b/hll/include/hll.hpp
index 19bf465..aeb9146 100644
--- a/hll/include/hll.hpp
+++ b/hll/include/hll.hpp
@@ -21,7 +21,6 @@
#define _HLL_HPP_
#include "HllUtil.hpp"
-#include "PairIterator.hpp"
#include <memory>
#include <iostream>
@@ -96,7 +95,7 @@ namespace datasketches {
enum target_hll_type {
HLL_4, ///< 4 bits per entry (most compact, size may vary)
HLL_6, ///< 6 bits per entry (fixed size)
- HLL_8 ///< 8 bits per entry (fastst, fixed size)
+ HLL_8 ///< 8 bits per entry (fastest, fixed size)
};
template<typename A>
@@ -115,7 +114,7 @@ class hll_sketch_alloc final {
* Constructs a new HLL sketch.
* @param lg_config_k Sketch can hold 2^lg_config_k rows
* @param tgt_type The HLL mode to use, if/when the sketch reaches that state
- * @param start_full_size Indicates whetehr to start in HLL mode,
+ * @param start_full_size Indicates whether to start in HLL mode,
* keeping memory use constant (if HLL_6 or HLL_8) at the cost of
* starting out using much more memory
*/
@@ -219,7 +218,7 @@ class hll_sketch_alloc final {
std::string to_string(bool summary = true,
bool detail = false,
bool aux_detail = false,
- bool all = false) const;
+ bool all = false) const;
/**
* Present the given std::string as a potential unique item.
@@ -393,9 +392,6 @@ class hll_sketch_alloc final {
static double get_rel_err(bool upper_bound, bool unioned,
int lg_config_k, int num_std_dev);
- //! Returns an iterator over the sketch, intended for debug use
- pair_iterator_with_deleter<A> get_iterator() const;
-
private:
explicit hll_sketch_alloc(HllSketchImpl<A>* that);
@@ -410,7 +406,6 @@ class hll_sketch_alloc final {
bool is_estimation_mode() const;
typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
- friend AllocHllSketch;
HllSketchImpl<A>* sketch_impl;
friend hll_union_alloc<A>;
@@ -454,14 +449,14 @@ class hll_union_alloc {
/**
* Construct an hll_union operator from the given std::istream, which
- * must be a avlid serialized image of an hll_union.
+ * must be a valid serialized image of an hll_union.
* @param is The input stream from which to read.
*/
static hll_union_alloc deserialize(std::istream& is);
/**
* Construct an hll_union operator from the given byte array, which
- * must be a avlid serialized image of an hll_union.
+ * must be a valid serialized image of an hll_union.
* @param bytes The byte array to read.
* @param len Byte array length in bytes.
*/
@@ -715,17 +710,17 @@ class hll_union_alloc {
private:
/**
- * Union the given source and destination sketches. This static method examines the state of
- * the current internal gadget and the incoming sketch and determines the optimum way to
+ * Union the given source and destination sketches. This method examines the state of
+ * the current internal gadget and the incoming sketch and determines the optimal way to
* perform the union. This may involve swapping, down-sampling, transforming, and / or
* copying one of the arguments and may completely replace the internals of the union.
*
* @param incoming_impl the given incoming sketch, which may not be modified.
* @param lg_max_k the maximum value of log2 K for this union.
*/
- void union_impl(HllSketchImpl<A>* incoming_impl, int lg_max_k);
+ inline void union_impl(const hll_sketch_alloc<A>& sketch, int lg_max_k);
- static HllSketchImpl<A>* copy_or_downsample(HllSketchImpl<A>* src_impl, int tgt_lg_k);
+ static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, int tgt_lg_k);
void coupon_update(int coupon);
diff --git a/hll/include/hll.private.hpp b/hll/include/hll.private.hpp
index 0441949..980558e 100644
--- a/hll/include/hll.private.hpp
+++ b/hll/include/hll.private.hpp
@@ -11,25 +11,22 @@
#include "Hll6Array.hpp"
#include "Hll8Array.hpp"
#include "HllArray.hpp"
-#include "HllPairIterator.hpp"
#include "HllSketchImpl.hpp"
#include "HllSketchImplFactory.hpp"
#include "HllUtil.hpp"
-#include "IntArrayPairIterator.hpp"
-#include "PairIterator.hpp"
#include "RelativeErrorTables.hpp"
#include "AuxHashMap-internal.hpp"
+#include "coupon_iterator.hpp"
#include "CouponHashSet-internal.hpp"
#include "CouponList-internal.hpp"
#include "Hll4Array-internal.hpp"
#include "Hll6Array-internal.hpp"
#include "Hll8Array-internal.hpp"
#include "HllArray-internal.hpp"
-#include "HllPairIterator-internal.hpp"
#include "HllSketch-internal.hpp"
#include "HllSketchImpl-internal.hpp"
#include "HllUnion-internal.hpp"
-#include "IntArrayPairIterator-internal.hpp"
+#include "coupon_iterator-internal.hpp"
#endif // _HLL_PRIVATE_HPP_
diff --git a/hll/test/AuxHashMapTest.cpp b/hll/test/AuxHashMapTest.cpp
index 165fddb..2ab3fff 100644
--- a/hll/test/AuxHashMapTest.cpp
+++ b/hll/test/AuxHashMapTest.cpp
@@ -63,13 +63,14 @@ class AuxHashMapTest : public CppUnit::TestFixture {
map->mustAdd(i, i);
}
CPPUNIT_ASSERT_EQUAL(map->getLgAuxArrInts(), 4);
- std::unique_ptr<PairIterator<>, std::function<void(PairIterator<>*)>> itr = map->getIterator();
+ auto itr = map->begin(true);
int count1 = 0;
int count2 = 0;
- while (itr->nextAll()) {
+ while (itr != map->end()) {
++count2;
- int pair = itr->getPair();
+ int pair = *itr;
if (pair != 0) { ++count1; }
+ ++itr;
}
CPPUNIT_ASSERT_EQUAL(count1, 7);
CPPUNIT_ASSERT_EQUAL(count2, 16);
diff --git a/hll/test/CMakeLists.txt b/hll/test/CMakeLists.txt
index d47b685..5ca45f0 100644
--- a/hll/test/CMakeLists.txt
+++ b/hll/test/CMakeLists.txt
@@ -17,12 +17,6 @@
add_executable(hll_test)
-#target_include_directories(hll_test
-# PRIVATE
-# ${HLL_INCLUDE_DIR}
-# ${CPPUNIT_INCLUDE_DIR}
-#)
-
target_link_libraries(hll_test hll common_test)
set_target_properties(hll_test PROPERTIES
@@ -53,4 +47,5 @@ target_sources(hll_test
TablesTest.cpp
ToFromByteArrayTest.cpp
UnionCaseTest.cpp
+ IsomorphicTest.cpp
)
diff --git a/hll/test/CouponListTest.cpp b/hll/test/CouponListTest.cpp
index e970ffa..5284032 100644
--- a/hll/test/CouponListTest.cpp
+++ b/hll/test/CouponListTest.cpp
@@ -41,26 +41,28 @@ class CouponListTest : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE_END();
void println_string(std::string str) {
- //std::cout << str << "\n";
+ //std::cout << str << std::endl;
}
void checkIterator() {
int lgConfigK = 8;
- CouponList<>* sk = new CouponList<>(lgConfigK, HLL_4, LIST);
- for (int i = 0; i < 7; ++i) { sk->couponUpdate(i); } // not hashes but distinct values
- pair_iterator_with_deleter<> itr = sk->getIterator();
- println_string(itr->getHeader());
- while (itr->nextAll()) {
- int key = itr->getKey();
- int val = itr->getValue();
- int idx = itr->getIndex();
- int slot = itr->getSlot();
+ CouponList<> cl(lgConfigK, HLL_4, LIST);
+ for (int i = 1; i <= 8; ++i) { cl.couponUpdate(HllUtil<>::pair(i, i)); } // not hashes but distinct values
+ const int mask = (1 << lgConfigK) - 1;
+ int idx = 0;
+ auto itr = cl.begin(true);
+ while (itr != cl.end()) {
+ int key = HllUtil<>::getLow26(*itr);
+ int val = HllUtil<>::getValue(*itr);
+ int slot = HllUtil<>::getLow26(*itr) & mask;
std::ostringstream oss;
oss << "Idx: " << idx << ", Key: " << key << ", Val: " << val
<< ", Slot: " << slot;
println_string(oss.str());
+ CPPUNIT_ASSERT_EQUAL(val, idx + 1);
+ ++itr;
+ ++idx;
}
- delete sk;
}
void checkDuplicatesAndMisc() {
diff --git a/hll/test/CrossCountingTest.cpp b/hll/test/CrossCountingTest.cpp
index 9859d2d..c46beba 100644
--- a/hll/test/CrossCountingTest.cpp
+++ b/hll/test/CrossCountingTest.cpp
@@ -41,55 +41,52 @@ class CrossCountingTest : public CppUnit::TestFixture {
return sketch;
}
- int computeChecksum(const hll_sketch& sketch) {
- pair_iterator_with_deleter<> itr = sketch.get_iterator();
- int checksum = 0;
- int key;
- while(itr->nextAll()) {
- checksum += itr->getPair();
- key = itr->getKey(); // dummy
- }
- CPPUNIT_ASSERT(key >= 0); // avoids "set but unused" warning
- return checksum;
- }
-
void crossCountingCheck(const int lgK, const int n) {
hll_sketch sk4 = buildSketch(n, lgK, HLL_4);
- int s4csum = computeChecksum(sk4);
- int csum;
+ const double est = sk4.get_estimate();
+ const double lb = sk4.get_lower_bound(1);
+ const double ub = sk4.get_upper_bound(1);
hll_sketch sk6 = buildSketch(n, lgK, HLL_6);
- csum = computeChecksum(sk6);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk6.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk6.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk6.get_upper_bound(1), ub);
hll_sketch sk8 = buildSketch(n, lgK, HLL_8);
- csum = computeChecksum(sk8);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
-
+ CPPUNIT_ASSERT_EQUAL(sk8.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk8.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk8.get_upper_bound(1), ub);
+
// Conversions
hll_sketch sk4to6(sk4, HLL_6);
- csum = computeChecksum(sk4to6);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk4to6.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk4to6.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk4to6.get_upper_bound(1), ub);
hll_sketch sk4to8(sk4, HLL_8);
- csum = computeChecksum(sk4to8);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk4to8.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk4to8.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk4to8.get_upper_bound(1), ub);
hll_sketch sk6to4(sk6, HLL_4);
- csum = computeChecksum(sk6to4);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk6to4.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk6to4.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk6to4.get_upper_bound(1), ub);
hll_sketch sk6to8(sk6, HLL_8);
- csum = computeChecksum(sk6to8);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk6to8.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk6to8.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk6to8.get_upper_bound(1), ub);
hll_sketch sk8to4(sk8, HLL_4);
- csum = computeChecksum(sk8to4);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk8to4.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk8to4.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk8to4.get_upper_bound(1), ub);
hll_sketch sk8to6(sk8, HLL_6);
- csum = computeChecksum(sk8to6);
- CPPUNIT_ASSERT_EQUAL(csum, s4csum);
+ CPPUNIT_ASSERT_EQUAL(sk8to6.get_estimate(), est);
+ CPPUNIT_ASSERT_EQUAL(sk8to6.get_lower_bound(1), lb);
+ CPPUNIT_ASSERT_EQUAL(sk8to6.get_upper_bound(1), ub);
}
void crossCountingChecks() {
diff --git a/hll/test/IsomorphicTest.cpp b/hll/test/IsomorphicTest.cpp
new file mode 100644
index 0000000..a484612
--- /dev/null
+++ b/hll/test/IsomorphicTest.cpp
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/TestAssert.h>
+
+#include <cstdio>
+
+#include "hll.hpp"
+#include "HllUtil.hpp"
+
+// hex format for comparing serialized bytes
+namespace CppUnit {
+
+template<>
+struct assertion_traits<datasketches::hll_sketch::vector_bytes> {
+ static bool equal(const datasketches::hll_sketch::vector_bytes& a, const datasketches::hll_sketch::vector_bytes& b) {
+ return a == b;
+ }
+
+ static std::string toString(const datasketches::hll_sketch::vector_bytes& v) {
+ std::ostringstream s;
+ s << std::hex << std::setfill('0');
+ int cnt = 0;
+ for (uint8_t byte: v) {
+ if (cnt == 8) { // insert space after each 8 bytes for readability
+ s << ' ';
+ cnt = 0;
+ } else {
+ ++cnt;
+ }
+ s << std::setw(2) << static_cast<int>(byte);
+ }
+ return s.str();
+ }
+};
+
+}
+
+namespace datasketches {
+
+class IsomorphicTest : public CppUnit::TestFixture {
+ long v = 0;
+
+ CPPUNIT_TEST_SUITE(IsomorphicTest);
+ CPPUNIT_TEST(union_one_update_serialize_updatable);
+ CPPUNIT_TEST(union_one_update_serialize_compact);
+ CPPUNIT_TEST_SUITE_END();
+
+ // if lg_k >= 8, mode != SET!
+ static int get_n(int lg_k, hll_mode mode) {
+ if (mode == LIST) return 4;
+ if (mode == SET) return 1 << (lg_k - 4);
+ return ((lg_k < 8) && (mode == HLL)) ? (1 << lg_k) : 1 << (lg_k - 3);
+ }
+
+ hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
+ hll_sketch sk(lg_k, hll_type);
+ int n = get_n(lg_k, mode);
+ for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
+ v += n;
+ return sk;
+ }
+
+ // merges a sketch to an empty union and gets result of the same type, checks binary equivalence
+ void union_one_update(bool compact) {
+ for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
+ for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
+ if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
+ for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
+ target_hll_type hll_type = (target_hll_type) t;
+ hll_sketch sk1 = build_sketch(lg_k, hll_type, (hll_mode) mode);
+ hll_union u(lg_k);
+ u.update(sk1);
+ hll_sketch sk2 = u.get_result(hll_type);
+ auto bytes1 = compact ? sk1.serialize_compact() : sk1.serialize_updatable();
+ auto bytes2 = compact ? sk2.serialize_compact() : sk2.serialize_updatable();
+ auto msg = "LgK=" + std::to_string(lg_k)
+ + ", Mode=" + std::to_string(mode)
+ + ", Type=" + std::to_string(hll_type)
+ + "\n" + sk1.to_string(true, true, true, true)
+ + "\n" + sk2.to_string(true, true, true, true);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(msg, bytes1, bytes2);
+ }
+ }
+ }
+ }
+
+ void union_one_update_serialize_updatable() {
+ union_one_update(false);
+ }
+
+ void union_one_update_serialize_compact() {
+ union_one_update(true);
+ }
+
+ // converts a sketch to a different type and converts back to the original type to check binary equivalence
+ void convert_back_and_forth(bool compact) {
+ for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
+ for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
+ if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
+ for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
+ target_hll_type hll_type1 = (target_hll_type) t1;
+ hll_sketch sk1 = build_sketch(lg_k, hll_type1, (hll_mode) mode);
+ auto bytes1 = compact ? sk1.serialize_compact() : sk1.serialize_updatable();
+ for (int t2 = 0; t2 <= 2; t2++) { // HLL_4, HLL_6, HLL_8
+ if (t2 == t1) continue;
+ target_hll_type hll_type2 = (target_hll_type) t2;
+ hll_sketch sk2(hll_sketch(sk1, hll_type2), hll_type1);
+ auto bytes2 = compact ? sk2.serialize_compact() : sk2.serialize_updatable();
+ auto msg = "LgK=" + std::to_string(lg_k)
+ + ", Mode=" + std::to_string(mode)
+ + ", Type1=" + std::to_string(hll_type1)
+ + ", Type2=" + std::to_string(hll_type2)
+ + "\n" + sk1.to_string(true, true, true, true)
+ + "\n" + sk2.to_string(true, true, true, true);
+ CPPUNIT_ASSERT_EQUAL_MESSAGE(msg, bytes1, bytes2);
+ }
+ }
+ }
+ }
+ }
+
+ void convert_back_and_forth_serialize_updatable() {
+ convert_back_and_forth(false);
+ }
+
+ void convert_back_and_forth_serialize_compact() {
+ convert_back_and_forth(true);
+ }
+
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(IsomorphicTest);
+
+} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org