You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/16 22:22:58 UTC
[incubator-datasketches-cpp] branch tuple_sketch updated: stateful
allocator support
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/tuple_sketch by this push:
new 1cc3e30 stateful allocator support
1cc3e30 is described below
commit 1cc3e30ddfbe5912fbd2002c297b19400c458290
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Jul 16 15:22:46 2020 -0700
stateful allocator support
---
tuple/include/theta_set_difference_base.hpp | 3 ++-
tuple/include/theta_set_difference_base_impl.hpp | 5 ++--
tuple/include/theta_sketch_experimental.hpp | 5 +++-
tuple/include/theta_sketch_experimental_impl.hpp | 12 +++++++---
tuple/include/theta_union_base.hpp | 2 +-
tuple/include/theta_union_base_impl.hpp | 5 ++--
tuple/include/theta_union_experimental.hpp | 6 ++++-
tuple/include/theta_union_experimental_impl.hpp | 9 ++++---
tuple/include/theta_update_sketch_base.hpp | 6 ++---
tuple/include/theta_update_sketch_base_impl.hpp | 30 +++++++-----------------
tuple/include/tuple_sketch.hpp | 5 ++--
tuple/include/tuple_sketch_impl.hpp | 10 ++++----
tuple/include/tuple_union.hpp | 5 ++--
tuple/include/tuple_union_impl.hpp | 10 ++++----
14 files changed, 60 insertions(+), 53 deletions(-)
diff --git a/tuple/include/theta_set_difference_base.hpp b/tuple/include/theta_set_difference_base.hpp
index 0c84129..0142b5c 100644
--- a/tuple/include/theta_set_difference_base.hpp
+++ b/tuple/include/theta_set_difference_base.hpp
@@ -38,12 +38,13 @@ public:
using AllocU64 = typename std::allocator_traits<Allocator>::template rebind_alloc<uint64_t>;
using hash_table = theta_update_sketch_base<uint64_t, trivial_extract_key, AllocU64>;
- theta_set_difference_base(uint64_t seed);
+ theta_set_difference_base(uint64_t seed, const Allocator& allocator = Allocator());
template<typename SS>
CompactSketch compute(SS&& a, const Sketch& b, bool ordered) const;
private:
+ Allocator allocator_;
uint16_t seed_hash_;
};
diff --git a/tuple/include/theta_set_difference_base_impl.hpp b/tuple/include/theta_set_difference_base_impl.hpp
index 98c7ade..6e4d553 100644
--- a/tuple/include/theta_set_difference_base_impl.hpp
+++ b/tuple/include/theta_set_difference_base_impl.hpp
@@ -25,7 +25,8 @@
namespace datasketches {
template<typename EN, typename EK, typename S, typename CS, typename A>
-theta_set_difference_base<EN, EK, S, CS, A>::theta_set_difference_base(uint64_t seed):
+theta_set_difference_base<EN, EK, S, CS, A>::theta_set_difference_base(uint64_t seed, const A& allocator):
+allocator_(allocator),
seed_hash_(compute_seed_hash(seed))
{}
@@ -49,7 +50,7 @@ CS theta_set_difference_base<EN, EK, S, CS, A>::compute(SS&& a, const S& b, bool
conditional_back_inserter(entries, key_less_than<uint64_t, EN, EK>(theta)), comparator());
} else { // hash-based
const uint8_t lg_size = lg_size_from_count(b.get_num_retained(), hash_table::REBUILD_THRESHOLD);
- hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 1, 0); // seed is not used here
+ hash_table table(lg_size, lg_size, hash_table::resize_factor::X1, 1, 0, allocator_); // seed is not used here
for (const auto& entry: b) {
const uint64_t hash = EK()(entry);
if (hash < theta) {
diff --git a/tuple/include/theta_sketch_experimental.hpp b/tuple/include/theta_sketch_experimental.hpp
index fff6623..4a081d1 100644
--- a/tuple/include/theta_sketch_experimental.hpp
+++ b/tuple/include/theta_sketch_experimental.hpp
@@ -38,7 +38,10 @@ public:
class builder: public theta_base_builder<builder> {
public:
+ builder(const A& allocator = A());
theta_sketch_experimental build() const;
+ private:
+ A allocator_;
};
bool is_empty() const { return table_.is_empty_; }
@@ -67,7 +70,7 @@ private:
using theta_table = theta_update_sketch_base<uint64_t, trivial_extract_key, A>;
theta_table table_;
- theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+ theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator);
};
template<typename A = std::allocator<uint64_t>>
diff --git a/tuple/include/theta_sketch_experimental_impl.hpp b/tuple/include/theta_sketch_experimental_impl.hpp
index b8a8bbc..68d8c93 100644
--- a/tuple/include/theta_sketch_experimental_impl.hpp
+++ b/tuple/include/theta_sketch_experimental_impl.hpp
@@ -24,8 +24,9 @@ namespace datasketches {
// experimental update theta sketch derived from the same base as tuple sketch
template<typename A>
-theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-table_(lg_cur_size, lg_nom_size, rf, p, seed)
+theta_sketch_experimental<A>::theta_sketch_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+ float p, uint64_t seed, const A& allocator):
+table_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
{}
template<typename A>
@@ -104,9 +105,14 @@ auto theta_sketch_experimental<A>::end() const -> const_iterator {
return const_iterator(nullptr, 0, 1 << table_.lg_cur_size_);
}
+// builder
+
+template<typename A>
+theta_sketch_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+
template<typename A>
theta_sketch_experimental<A> theta_sketch_experimental<A>::builder::build() const {
- return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_);
+ return theta_sketch_experimental(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, allocator_);
}
template<typename A>
diff --git a/tuple/include/theta_union_base.hpp b/tuple/include/theta_union_base.hpp
index 6e3823f..03e71fd 100644
--- a/tuple/include/theta_union_base.hpp
+++ b/tuple/include/theta_union_base.hpp
@@ -38,7 +38,7 @@ public:
using resize_factor = typename hash_table::resize_factor;
using comparator = compare_by_key<ExtractKey>;
- theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+ theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
template<typename FwdSketch>
void update(FwdSketch&& sketch);
diff --git a/tuple/include/theta_union_base_impl.hpp b/tuple/include/theta_union_base_impl.hpp
index 056a72e..a1015ef 100644
--- a/tuple/include/theta_union_base_impl.hpp
+++ b/tuple/include/theta_union_base_impl.hpp
@@ -24,9 +24,10 @@
namespace datasketches {
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
-theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
+ float p, uint64_t seed, const P& policy, const A& allocator):
policy_(policy),
-table_(lg_cur_size, lg_nom_size, rf, p, seed),
+table_(lg_cur_size, lg_nom_size, rf, p, seed, allocator),
union_theta_(table_.theta_)
{}
diff --git a/tuple/include/theta_union_experimental.hpp b/tuple/include/theta_union_experimental.hpp
index 5fc7faf..5a5cb84 100644
--- a/tuple/include/theta_union_experimental.hpp
+++ b/tuple/include/theta_union_experimental.hpp
@@ -67,18 +67,22 @@ private:
State state_;
// for builder
- theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+ theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Allocator& allocator);
};
template<typename A>
class theta_union_experimental<A>::builder: public theta_base_builder<theta_union_experimental<A>::builder> {
public:
+ builder(const A& allocator = A());
/**
* This is to create an instance of the union with predefined parameters.
* @return an instance of the union
*/
theta_union_experimental<A> build() const;
+
+private:
+ A allocator_;
};
} /* namespace datasketches */
diff --git a/tuple/include/theta_union_experimental_impl.hpp b/tuple/include/theta_union_experimental_impl.hpp
index 0fd0443..51fe13e 100644
--- a/tuple/include/theta_union_experimental_impl.hpp
+++ b/tuple/include/theta_union_experimental_impl.hpp
@@ -20,8 +20,8 @@
namespace datasketches {
template<typename A>
-theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
-state_(lg_cur_size, lg_nom_size, rf, p, seed, pass_through_policy())
+theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, pass_through_policy(), allocator)
{}
template<typename A>
@@ -35,10 +35,13 @@ auto theta_union_experimental<A>::get_result(bool ordered) const -> CompactSketc
}
template<typename A>
+theta_union_experimental<A>::builder::builder(const A& allocator): allocator_(allocator) {}
+
+template<typename A>
auto theta_union_experimental<A>::builder::build() const -> theta_union_experimental {
return theta_union_experimental(
this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
- this->lg_k_, this->rf_, this->p_, this->seed_);
+ this->lg_k_, this->rf_, this->p_, this->seed_, allocator_);
}
} /* namespace datasketches */
diff --git a/tuple/include/theta_update_sketch_base.hpp b/tuple/include/theta_update_sketch_base.hpp
index 46d9cf6..70e3fe5 100644
--- a/tuple/include/theta_update_sketch_base.hpp
+++ b/tuple/include/theta_update_sketch_base.hpp
@@ -43,7 +43,8 @@ struct theta_update_sketch_base {
using resize_factor = theta_constants::resize_factor;
using comparator = compare_by_key<ExtractKey>;
- theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed);
+ theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p,
+ uint64_t seed, const Allocator& allocator);
// TODO: copy and move
~theta_update_sketch_base();
@@ -59,8 +60,6 @@ struct theta_update_sketch_base {
iterator begin() const;
iterator end() const;
- string<Allocator> to_string() const;
-
// resize threshold = 0.5 tuned for speed
static constexpr double RESIZE_THRESHOLD = 0.5;
// hash table rebuild threshold = 15/16
@@ -69,6 +68,7 @@ struct theta_update_sketch_base {
static constexpr uint8_t STRIDE_HASH_BITS = 7;
static constexpr uint32_t STRIDE_MASK = (1 << STRIDE_HASH_BITS) - 1;
+ Allocator allocator_;
bool is_empty_;
uint8_t lg_cur_size_;
uint8_t lg_nom_size_;
diff --git a/tuple/include/theta_update_sketch_base_impl.hpp b/tuple/include/theta_update_sketch_base_impl.hpp
index 793113b..01a1bcb 100644
--- a/tuple/include/theta_update_sketch_base_impl.hpp
+++ b/tuple/include/theta_update_sketch_base_impl.hpp
@@ -24,7 +24,8 @@
namespace datasketches {
template<typename EN, typename EK, typename A>
-theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
+theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const A& allocator):
+allocator_(allocator),
is_empty_(true),
lg_cur_size_(lg_cur_size),
lg_nom_size_(lg_nom_size),
@@ -35,7 +36,7 @@ seed_(seed),
entries_(nullptr)
{
const size_t size = 1 << lg_cur_size;
- entries_ = A().allocate(size);
+ entries_ = allocator_.allocate(size);
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
if (p < 1) this->theta_ *= p;
}
@@ -47,7 +48,7 @@ theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
for (size_t i = 0; i < size; ++i) {
if (EK()(entries_[i]) != 0) entries_[i].~EN();
}
- A().deallocate(entries_, size);
+ allocator_.deallocate(entries_, size);
}
template<typename EN, typename EK, typename A>
@@ -108,21 +109,6 @@ auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
}
template<typename EN, typename EK, typename A>
-string<A> theta_update_sketch_base<EN, EK, A>::to_string() const {
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
- auto type = typeid(*this).name();
- os << "type: " << type << std::endl;
- os << "sizeof: " << sizeof(*this) << std::endl;
- os << "is_empty: " << (is_empty_ ? "true" : "false") << std::endl;
- os << "lg_cur_size: " << std::to_string(lg_cur_size_) << std::endl;
- os << "lg_nom_size: " << std::to_string(lg_nom_size_) << std::endl;
- os << "num_entries: " << num_entries_ << std::endl;
- os << "theta (as long): " << theta_ << std::endl;
- os << "theta (as fraction): " << static_cast<double>(theta_) / theta_constants::MAX_THETA << std::endl;
- return os.str();
-}
-
-template<typename EN, typename EK, typename A>
uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
return std::floor(fraction * (1 << lg_cur_size));
@@ -142,7 +128,7 @@ void theta_update_sketch_base<EN, EK, A>::resize() {
lg_cur_size_ += factor;
const size_t new_size = 1 << lg_cur_size_;
EN* old_entries = entries_;
- entries_ = A().allocate(new_size);
+ entries_ = allocator_.allocate(new_size);
for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
num_entries_ = 0;
for (size_t i = 0; i < old_size; ++i) {
@@ -152,7 +138,7 @@ void theta_update_sketch_base<EN, EK, A>::resize() {
old_entries[i].~EN();
}
}
- A().deallocate(old_entries, old_size);
+ allocator_.deallocate(old_entries, old_size);
}
template<typename EN, typename EK, typename A>
@@ -162,7 +148,7 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
std::nth_element(&entries_[0], &entries_[pivot], &entries_[size], comparator());
this->theta_ = EK()(entries_[pivot]);
EN* old_entries = entries_;
- entries_ = A().allocate(size);
+ entries_ = allocator_.allocate(size);
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
num_entries_ = 0;
for (size_t i = 0; i < size; ++i) {
@@ -172,7 +158,7 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
old_entries[i].~EN();
}
}
- A().deallocate(old_entries, size);
+ allocator_.deallocate(old_entries, size);
}
template<typename EN, typename EK, typename A>
diff --git a/tuple/include/tuple_sketch.hpp b/tuple/include/tuple_sketch.hpp
index 90d2046..f7f228e 100644
--- a/tuple/include/tuple_sketch.hpp
+++ b/tuple/include/tuple_sketch.hpp
@@ -317,7 +317,7 @@ private:
tuple_map map_;
// for builder
- update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+ update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
virtual void print_specifics(std::ostringstream& os) const;
};
@@ -448,7 +448,7 @@ public:
/**
* Creates and instance of the builder with default parameters.
*/
- builder(const P& policy = P());
+ builder(const P& policy = P(), const A& allocator = A());
/**
* This is to create an instance of the sketch with predefined parameters.
@@ -458,6 +458,7 @@ public:
private:
P policy_;
+ A allocator_;
};
} /* namespace datasketches */
diff --git a/tuple/include/tuple_sketch_impl.hpp b/tuple/include/tuple_sketch_impl.hpp
index 75fa337..a972d8a 100644
--- a/tuple/include/tuple_sketch_impl.hpp
+++ b/tuple/include/tuple_sketch_impl.hpp
@@ -82,9 +82,9 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
// update sketch
template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
+update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy, const A& allocator):
policy_(policy),
-map_(lg_cur_size, lg_nom_size, rf, p, seed)
+map_(lg_cur_size, lg_nom_size, rf, p, seed, allocator)
{}
template<typename S, typename U, typename P, typename A>
@@ -531,12 +531,12 @@ void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
// builder
template<typename S, typename U, typename P, typename A>
-update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy):
-policy_(policy) {}
+update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
+policy_(policy), allocator_(allocator) {}
template<typename S, typename U, typename P, typename A>
auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
- return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
+ return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_, allocator_);
}
} /* namespace datasketches */
diff --git a/tuple/include/tuple_union.hpp b/tuple/include/tuple_union.hpp
index 0ee776c..d716664 100644
--- a/tuple/include/tuple_union.hpp
+++ b/tuple/include/tuple_union.hpp
@@ -83,7 +83,7 @@ private:
State state_;
// for builder
- tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy);
+ tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const Policy& policy, const Allocator& allocator);
};
template<typename S, typename P, typename A>
@@ -92,7 +92,7 @@ public:
/**
* Creates and instance of the builder with default parameters.
*/
- builder(const P& policy = P());
+ builder(const P& policy = P(), const A& allocator = A());
/**
* This is to create an instance of the union with predefined parameters.
@@ -102,6 +102,7 @@ public:
private:
P policy_;
+ A allocator_;
};
} /* namespace datasketches */
diff --git a/tuple/include/tuple_union_impl.hpp b/tuple/include/tuple_union_impl.hpp
index 9f471ac..6df2794 100644
--- a/tuple/include/tuple_union_impl.hpp
+++ b/tuple/include/tuple_union_impl.hpp
@@ -20,8 +20,8 @@
namespace datasketches {
template<typename S, typename P, typename A>
-tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy):
-state_(lg_cur_size, lg_nom_size, rf, p, seed, internal_policy(policy))
+tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed, const P& policy, const A& allocator):
+state_(lg_cur_size, lg_nom_size, rf, p, seed, internal_policy(policy), allocator)
{}
template<typename S, typename P, typename A>
@@ -36,12 +36,12 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
}
template<typename S, typename P, typename A>
-tuple_union<S, P, A>::builder::builder(const P& policy):
-policy_(policy) {}
+tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
+policy_(policy), allocator_(allocator) {}
template<typename S, typename P, typename A>
auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
- return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_);
+ return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->seed_, policy_, allocator_);
}
} /* namespace datasketches */
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org