You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by GitBox <gi...@apache.org> on 2023/01/12 22:16:35 UTC

[GitHub] [datasketches-cpp] AlexanderSaydakov commented on a diff in pull request #325: Cpp countmin

AlexanderSaydakov commented on code in PR #325:
URL: https://github.com/apache/datasketches-cpp/pull/325#discussion_r1068519801


##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;

Review Comment:
   unnecessary



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.

Review Comment:
   I think the description of T should be near the class template declaration above



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch
+
+  /**
+  * @return configured num_hashes of this sketch
+  */
+  uint64_t get_num_hashes() ;
+
+  /**
+  * @return configured num_buckets of this sketch
+  */
+  uint64_t get_num_buckets() ;
+
+  /**
+  * @return configured seed of this sketch
+  */
+  uint64_t get_seed() ;
+
+  /**
+  * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+  */
+  std::vector<uint64_t> get_config() ; // Sketch parameter configuration -- needed for merging.

Review Comment:
   there are methods to get those things above. seems unnecessary



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch
+
+  /**
+  * @return configured num_hashes of this sketch
+  */
+  uint64_t get_num_hashes() ;
+
+  /**
+  * @return configured num_buckets of this sketch
+  */
+  uint64_t get_num_buckets() ;
+
+  /**
+  * @return configured seed of this sketch
+  */
+  uint64_t get_seed() ;
+
+  /**
+  * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+  */
+  std::vector<uint64_t> get_config() ; // Sketch parameter configuration -- needed for merging.
+
+  /**
+   * @return epsilon : double
+   * The maximum permissible error for any frequency estimate query.
+   * epsilon = ceil(e / num_buckets)
+   */
+   double get_relative_error() ;
+
+  /**
+  * @return total_weight : typename T
+  * The total weight currently inserted into the stream.
+  */
+  T get_total_weight() ;
+
+  /**
+  * @return vector of the sketch data structure
+  * Required for merging.
+  */
+  std::vector<T> get_sketch() ;
+
+  /*
+ * @return number_of_buckets : the number of hash buckets at every level of the
+ * sketch required in order to obtain the specified relative error.
+ * [1] - Section 3 ``Data Structure'', page 6.
+ */
+  static uint64_t suggest_num_buckets(double relative_error) ;

Review Comment:
   @param is missing in the doc here and many more below



##########
count/include/count_min_impl.hpp:
##########
@@ -0,0 +1,234 @@
+#ifndef COUNT_MIN_IMPL_HPP_
+#define COUNT_MIN_IMPL_HPP_
+
+#include "MurmurHash3.h"
+#include <random>
+
+namespace datasketches {
+
+template<typename T>
+count_min_sketch<T>::count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed):
+num_hashes(num_hashes),
+num_buckets(num_buckets),
+seed(seed){
+  sketch_length = num_hashes*num_buckets ;
+  sketch.resize(sketch_length) ;
+  assert(num_buckets >= 3); // need relative error epsilon at most 1.

Review Comment:
   we have a policy to throw exceptions instead of asserts. std::invalid_argument in this case I suppose



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;

Review Comment:
   initialization with 0 should better be in the constructor



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;

Review Comment:
   These member vars should be private.
   Do we really need 64 bits for num_hashes and num_buckets? perhaps 32 or even 16.
   sketch_length seems to be trivially derived from num_hashes and num_buckets. perhaps unnecessary



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch
+
+  /**
+  * @return configured num_hashes of this sketch
+  */
+  uint64_t get_num_hashes() ;
+
+  /**
+  * @return configured num_buckets of this sketch
+  */
+  uint64_t get_num_buckets() ;
+
+  /**
+  * @return configured seed of this sketch
+  */
+  uint64_t get_seed() ;
+
+  /**
+  * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+  */
+  std::vector<uint64_t> get_config() ; // Sketch parameter configuration -- needed for merging.
+
+  /**
+   * @return epsilon : double
+   * The maximum permissible error for any frequency estimate query.
+   * epsilon = ceil(e / num_buckets)
+   */
+   double get_relative_error() ;
+
+  /**
+  * @return total_weight : typename T
+  * The total weight currently inserted into the stream.
+  */
+  T get_total_weight() ;
+
+  /**
+  * @return vector of the sketch data structure
+  * Required for merging.
+  */
+  std::vector<T> get_sketch() ;

Review Comment:
   this seems to return a copy. consider iterator over the sketch instead



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch
+
+  /**
+  * @return configured num_hashes of this sketch
+  */
+  uint64_t get_num_hashes() ;
+
+  /**
+  * @return configured num_buckets of this sketch
+  */
+  uint64_t get_num_buckets() ;
+
+  /**
+  * @return configured seed of this sketch
+  */
+  uint64_t get_seed() ;
+
+  /**
+  * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+  */
+  std::vector<uint64_t> get_config() ; // Sketch parameter configuration -- needed for merging.
+
+  /**
+   * @return epsilon : double
+   * The maximum permissible error for any frequency estimate query.
+   * epsilon = ceil(e / num_buckets)
+   */
+   double get_relative_error() ;
+
+  /**
+  * @return total_weight : typename T
+  * The total weight currently inserted into the stream.
+  */
+  T get_total_weight() ;
+
+  /**
+  * @return vector of the sketch data structure
+  * Required for merging.
+  */
+  std::vector<T> get_sketch() ;
+
+  /*
+ * @return number_of_buckets : the number of hash buckets at every level of the
+ * sketch required in order to obtain the specified relative error.
+ * [1] - Section 3 ``Data Structure'', page 6.
+ */
+  static uint64_t suggest_num_buckets(double relative_error) ;
+
+  /*
+  * @return number_of_hashes : the number of hash functions that are required in
+   * order to achieve the specified confidence of the sketch.
+   * confidence = 1 - delta, with delta denoting the sketch failure probability in the literature.
+   * [1] - Section 3 ``Data Structure'', page 6.
+  */
+  static uint64_t suggest_num_hashes(double confidence) ;
+
+  /**
+  * @return vector of uint64_t which each represent the index to which `value' must update in the sketch
+  */
+  std::vector<uint64_t> get_hashes(const void* item, size_t size) ;

Review Comment:
   is this for internal use?



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>

Review Comment:
   if T represents weight, perhaps it would be better called W or even Weight?



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch

Review Comment:
   should be private



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+
+#include "common_defs.hpp"
+
+namespace datasketches {
+
+  /*
+   * C++ implementation of the CountMin sketch data structure of Cormode and Muthukrishnan.
+   * [1] - http://dimacs.rutgers.edu/~graham/pubs/papers/cm-full.pdf
+   * @author Charlie Dickens
+   */
+
+template<typename T> class count_min_sketch ;
+
+template<typename T>
+class count_min_sketch{
+  static_assert(std::is_arithmetic<T>::value, "Arithmetic type expected");
+public:
+  uint64_t num_hashes, num_buckets, seed, sketch_length ;
+  T total_weight = 0;
+  std::vector<uint64_t> hash_seeds ;
+
+  /**
+  * Creates an instance of the sketch given parameters num_hashes, num_buckets and hash seed, `seed`.
+  * @param num_hashes : number of hash functions in the sketch. Equivalently the number of rows in the array
+  * @param num_buckets : number of buckets that hash functions map into. Equivalently the number of columns in the array
+  * @param seed for hash function
+  *
+  * The template type T is the type of the vector that contains the weights, not the objects inserted into the sketch.
+   * The items inserted into the sketch can be arbitrary type, so long as they are hashable via murmurhash.
+   * Only update and estimate methods are added for uint64_t and string types.
+  */
+  count_min_sketch(uint64_t num_hashes, uint64_t num_buckets, uint64_t seed = DEFAULT_SEED) ;
+
+  std::vector<T> sketch ; // the array stored by the sketch
+
+  /**
+  * @return configured num_hashes of this sketch
+  */
+  uint64_t get_num_hashes() ;
+
+  /**
+  * @return configured num_buckets of this sketch
+  */
+  uint64_t get_num_buckets() ;
+
+  /**
+  * @return configured seed of this sketch
+  */
+  uint64_t get_seed() ;
+
+  /**
+  * @return vector of the sketch configuration: {num_hsahes, num_buckets, seed}
+  */
+  std::vector<uint64_t> get_config() ; // Sketch parameter configuration -- needed for merging.
+
+  /**
+   * @return epsilon : double
+   * The maximum permissible error for any frequency estimate query.
+   * epsilon = ceil(e / num_buckets)
+   */
+   double get_relative_error() ;
+
+  /**
+  * @return total_weight : typename T
+  * The total weight currently inserted into the stream.
+  */
+  T get_total_weight() ;
+
+  /**
+  * @return vector of the sketch data structure
+  * Required for merging.
+  */
+  std::vector<T> get_sketch() ;
+
+  /*
+ * @return number_of_buckets : the number of hash buckets at every level of the
+ * sketch required in order to obtain the specified relative error.
+ * [1] - Section 3 ``Data Structure'', page 6.
+ */
+  static uint64_t suggest_num_buckets(double relative_error) ;
+
+  /*
+  * @return number_of_hashes : the number of hash functions that are required in
+   * order to achieve the specified confidence of the sketch.
+   * confidence = 1 - delta, with delta denoting the sketch failure probability in the literature.
+   * [1] - Section 3 ``Data Structure'', page 6.
+  */
+  static uint64_t suggest_num_hashes(double confidence) ;
+
+  /**
+  * @return vector of uint64_t which each represent the index to which `value' must update in the sketch
+  */
+  std::vector<uint64_t> get_hashes(const void* item, size_t size) ;
+
+  /**
+   * @param item : uint64_t type
+   * @return an estimate of the item's frequency.
+   */
+  T get_estimate(uint64_t item) ;
+
+   /**
+   * @param item : std::string type
+   * @return an estimate of the item's frequency.
+   */
+  T get_estimate(const std::string& item) ;
+
+  /**
+   * @return f_est : type T -- the estimated frequency of item
+   * Guarantee is that f_est satisfies:
+   * f_true - relative_error*total_weight <= f_est <= f_true
+   */
+   T get_estimate(const void* item, size_t size) ;
+
+   /*
+    * @return the upper bound on the true frequency of the item
+    * f_true <= f_est + relative_error*total_weight
+    */
+   T get_upper_bound(const void* item, size_t size) ;
+   T get_upper_bound(uint64_t) ;
+   T get_upper_bound(const std::string& item) ;
+
+   /*
+   * @return the upper bound on the true frequency of the item
+   * f_true <= f_est + relative_error*total_weight
+   */
+  T get_lower_bound(const void* item, size_t size) ;
+  T get_lower_bound(uint64_t) ;
+  T get_lower_bound(const std::string& item) ;
+
+  /**
+  * void function for generic updates.
+  */
+  void update(const void* item, size_t size, T weight) ;
+
+  /**
+  * void function which inserts an item of type uint64_t into the sketch
+  */
+  void update(uint64_t item, T weight) ;
+  void update(uint64_t item) ;
+
+  /**
+   * void function which inserts an item of type std::string into the sketch
+   */
+  void update(const std::string& item, T weight) ;
+  void update(const std::string& item) ;
+
+  /*
+  * merges a separate count_min_sketch into this count_min_sketch.
+  */
+  void merge(count_min_sketch<T> &other_sketch) ;

Review Comment:
   we don't modify the other, right? if so, it should be const



##########
count/include/count_min.hpp:
##########
@@ -0,0 +1,166 @@
+#ifndef COUNT_MIN_HPP_
+#define COUNT_MIN_HPP_
+
+#include <cstdint>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+

Review Comment:
   some of these includes are needed in the impl only



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org