You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/10/06 19:27:26 UTC

[datasketches-cpp] branch universal_sorted_view updated: removed get_quantiles(), used generic term item, documentation changes

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch universal_sorted_view
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


The following commit(s) were added to refs/heads/universal_sorted_view by this push:
     new f61e959  removed get_quantiles(), used generic term item, documentation changes
f61e959 is described below

commit f61e959e94143e51f916bfd0de2fbcad74b4cca0
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Thu Oct 6 12:27:21 2022 -0700

    removed get_quantiles(), used generic term item, documentation changes
---
 kll/include/kll_sketch.hpp                  | 182 ++++++++++-------------
 kll/include/kll_sketch_impl.hpp             | 214 ++++++++++++----------------
 kll/test/kll_sketch_test.cpp                |  41 ------
 kll/test/kll_sketch_validation.cpp          |   8 +-
 quantiles/include/quantiles_sketch.hpp      |  79 ++++------
 quantiles/include/quantiles_sketch_impl.hpp |  10 +-
 req/include/req_sketch.hpp                  |  47 +++---
 req/include/req_sketch_impl.hpp             |  25 +---
 req/test/req_sketch_test.cpp                |  16 ---
 9 files changed, 220 insertions(+), 402 deletions(-)

diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 4a76e34..715717c 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -35,9 +35,9 @@ namespace datasketches {
  * See <a href="https://arxiv.org/abs/1603.05346v2">Optimal Quantile Approximation in Streams</a>.
  *
  * <p>This is a stochastic streaming sketch that enables near real-time analysis of the
- * approximate distribution of values from a very large stream in a single pass, requiring only
- * that the values are comparable.
- * The analysis is obtained using <i>get_quantile()</i> or <i>get_quantiles()</i> functions or the
+ * approximate distribution of items from a very large stream in a single pass, requiring only
+ * that the items are comparable.
+ * The analysis is obtained using <i>get_quantile()</i> function or the
  * inverse functions get_rank(), get_PMF() (Probability Mass Function), and get_CDF()
  * (Cumulative Distribution Function).
  *
@@ -45,14 +45,15 @@ namespace datasketches {
  * with the equivalent Java implementation only when template parameter T = float
  * (32-bit single precision values).
  * 
- * <p>Given an input stream of <i>N</i> numeric values, the <i>absolute rank</i> of any specific
- * value is defined as its index <i>(0 to N-1)</i> in the hypothetical sorted stream of all
- * <i>N</i> input values.
+ * <p>Given an input stream of <i>N</i> items, the <i>natural rank</i> of any specific
+ * item is defined as its index <i>(1 to N)</i> in inclusive mode
+ * or <i>(0 to N-1)</i> in exclusive mode
+ * in the hypothetical sorted stream of all <i>N</i> input items.
  *
- * <p>The <i>normalized rank</i> (<i>rank</i>) of any specific value is defined as its
- * <i>absolute rank</i> divided by <i>N</i>.
- * Thus, the <i>normalized rank</i> is a value between zero and one.
- * In the documentation for this sketch <i>absolute rank</i> is never used so any
+ * <p>The <i>normalized rank</i> (<i>rank</i>) of any specific item is defined as its
+ * <i>natural rank</i> divided by <i>N</i>.
+ * Thus, the <i>normalized rank</i> is between zero and one.
+ * In the documentation for this sketch <i>natural rank</i> is never used so any
  * reference to just <i>rank</i> should be interpreted to mean <i>normalized rank</i>.
  *
  * <p>This sketch is configured with a parameter <i>k</i>, which affects the size of the sketch
@@ -61,18 +62,18 @@ namespace datasketches {
  * <p>The estimation error is commonly called <i>epsilon</i> (or <i>eps</i>) and is a fraction
  * between zero and one. Larger values of <i>k</i> result in smaller values of epsilon.
  * Epsilon is always with respect to the rank and cannot be applied to the
- * corresponding values.
+ * corresponding items.
  *
- * <p>The relationship between the normalized rank and the corresponding values can be viewed
+ * <p>The relationship between the normalized rank and the corresponding items can be viewed
  * as a two dimensional monotonic plot with the normalized rank on one axis and the
- * corresponding values on the other axis. If the y-axis is specified as the value-axis and
+ * corresponding items on the other axis. If the y-axis is specified as the item-axis and
  * the x-axis as the normalized rank, then <i>y = get_quantile(x)</i> is a monotonically
  * increasing function.
  *
- * <p>The functions <i>get_quantile(rank)</i> and get_quantiles(...) translate ranks into
- * corresponding values. The functions <i>get_rank(value),
+ * <p>The function <i>get_quantile(rank)</i> translates ranks into
+ * corresponding quantiles. The functions <i>get_rank(item),
  * get_CDF(...) (Cumulative Distribution Function), and get_PMF(...)
- * (Probability Mass Function)</i> perform the opposite operation and translate values into ranks.
+ * (Probability Mass Function)</i> perform the opposite operation and translate items into ranks.
  *
  * <p>The <i>getPMF(...)</i> function has about 13 to 47% worse rank error (depending
  * on <i>k</i>) than the other queries because the mass of each "bin" of the PMF has
@@ -84,60 +85,60 @@ namespace datasketches {
  *
  * <p>A <i>get_quantile(rank)</i> query has the following guarantees:
  * <ul>
- * <li>Let <i>v = get_quantile(r)</i> where <i>r</i> is the rank between zero and one.</li>
- * <li>The value <i>v</i> will be a value from the input stream.</li>
- * <li>Let <i>trueRank</i> be the true rank of <i>v</i> derived from the hypothetical sorted
- * stream of all <i>N</i> values.</li>
+ * <li>Let <i>q = get_quantile(r)</i> where <i>r</i> is the rank between zero and one.</li>
+ * <li>The quantile <i>q</i> will be an item from the input stream.</li>
+ * <li>Let <i>trueRank</i> be the true rank of <i>q</i> derived from the hypothetical sorted
+ * stream of all <i>N</i> items.</li>
  * <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
  * <li>Then <i>r - eps &le; trueRank &le; r + eps</i> with a confidence of 99%. Note that the
- * error is on the rank, not the value.</li>
+ * error is on the rank, not the quantile.</li>
  * </ul>
  *
- * <p>A <i>get_rank(value)</i> query has the following guarantees:
+ * <p>A <i>get_rank(item)</i> query has the following guarantees:
  * <ul>
- * <li>Let <i>r = get_rank(v)</i> where <i>v</i> is a value between the min and max values of
+ * <li>Let <i>r = get_rank(i)</i> where <i>i</i> is an item between the min and max items of
  * the input stream.</li>
- * <li>Let <i>true_rank</i> be the true rank of <i>v</i> derived from the hypothetical sorted
- * stream of all <i>N</i> values.</li>
+ * <li>Let <i>true_rank</i> be the true rank of <i>i</i> derived from the hypothetical sorted
+ * stream of all <i>N</i> items.</li>
  * <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
  * <li>Then <i>r - eps &le; trueRank &le; r + eps</i> with a confidence of 99%.</li>
  * </ul>
  *
  * <p>A <i>get_PMF()</i> query has the following guarantees:
  * <ul>
- * <li>Let <i>{r1, r2, ..., r(m+1)} = get_PMF(v1, v2, ..., vm)</i> where <i>v1, v2</i> are values
- * between the min and max values of the input stream.
- * <li>Let <i>mass<sub>i</sub> = estimated mass between v<sub>i</sub> and v<sub>i+1</sub></i>.</li>
- * <li>Let <i>trueMass</i> be the true mass between the values of <i>v<sub>i</sub>,
- * v<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> values.</li>
+ * <li>Let <i>{r1, r2, ..., r(m+1)} = get_PMF(s1, s2, ..., sm)</i> where <i>s1, s2</i> are
+ * split points (items from the input domain) between the min and max items of the input stream.
+ * <li>Let <i>mass<sub>i</sub> = estimated mass between s<sub>i</sub> and s<sub>i+1</sub></i>.</li>
+ * <li>Let <i>trueMass</i> be the true mass between the items of <i>s<sub>i</sub>,
+ * s<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> items.</li>
  * <li>Let <i>eps = get_normalized_rank_error(true)</i>.</li>
  * <li>then <i>mass - eps &le; trueMass &le; mass + eps</i> with a confidence of 99%.</li>
- * <li>r(m+1) includes the mass of all points larger than vm.</li>
+ * <li>r(m+1) includes the mass of all points larger than sm.</li>
  * </ul>
  *
  * <p>A <i>get_CDF(...)</i> query has the following guarantees;
  * <ul>
- * <li>Let <i>{r1, r2, ..., r(m+1)} = get_CDF(v1, v2, ..., vm)</i> where <i>v1, v2</i> are values
- * between the min and max values of the input stream.
+ * <li>Let <i>{r1, r2, ..., r(m+1)} = get_CDF(s1, s2, ..., sm)</i> where <i>s1, s2, ...</i> are
+ * split points (items from the input domain) between the min and max items of the input stream.
  * <li>Let <i>mass<sub>i</sub> = r<sub>i+1</sub> - r<sub>i</sub></i>.</li>
- * <li>Let <i>trueMass</i> be the true mass between the true ranks of <i>v<sub>i</sub>,
- * v<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> values.</li>
+ * <li>Let <i>trueMass</i> be the true mass between the true ranks of <i>s<sub>i</sub>,
+ * s<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> items.</li>
  * <li>Let <i>eps = get_normalized_rank_error(true)</i>.</li>
  * <li>then <i>mass - eps &le; trueMass &le; mass + eps</i> with a confidence of 99%.</li>
- * <li>1 - r(m+1) includes the mass of all points larger than vm.</li>
+ * <li>1 - r(m+1) includes the mass of all points larger than sm.</li>
  * </ul>
  *
  * <p>From the above, it might seem like we could make some estimates to bound the
- * <em>value</em> returned from a call to <em>get_quantile()</em>. The sketch, however, does not
- * let us derive error bounds or confidences around values. Because errors are independent, we
+ * <em>item</em> returned from a call to <em>get_quantile()</em>. The sketch, however, does not
+ * let us derive error bounds or confidences around items. Because errors are independent, we
  * can approximately bracket a value as shown below, but there are no error estimates available.
  * Additionally, the interval may be quite large for certain distributions.
  * <ul>
- * <li>Let <i>v = get_quantile(r)</i>, the estimated quantile value of rank <i>r</i>.</li>
+ * <li>Let <i>q = get_quantile(r)</i>, the estimated quantile of rank <i>r</i>.</li>
  * <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
- * <li>Let <i>v<sub>lo</sub></i> = estimated quantile value of rank <i>(r - eps)</i>.</li>
- * <li>Let <i>v<sub>hi</sub></i> = estimated quantile value of rank <i>(r + eps)</i>.</li>
- * <li>Then <i>v<sub>lo</sub> &le; v &le; v<sub>hi</sub></i>, with 99% confidence.</li>
+ * <li>Let <i>q<sub>lo</sub></i> = estimated quantile of rank <i>(r - eps)</i>.</li>
+ * <li>Let <i>q<sub>hi</sub></i> = estimated quantile of rank <i>(r + eps)</i>.</li>
+ * <li>Then <i>q<sub>lo</sub> &le; q &le; q<sub>hi</sub></i>, with 99% confidence.</li>
  * </ul>
  *
  * author Kevin Lang
@@ -145,13 +146,6 @@ namespace datasketches {
  * author Lee Rhodes
  */
 
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
-template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
-template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
-template<typename A> using vector_d = std::vector<double, AllocD<A>>;
-
 namespace kll_constants {
   const uint16_t DEFAULT_K = 200;
 }
@@ -165,6 +159,7 @@ class kll_sketch {
   public:
     using value_type = T;
     using comparator = C;
+    using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
 
     static const uint8_t DEFAULT_M = 8;
     static const uint16_t MIN_K = DEFAULT_M;
@@ -266,36 +261,6 @@ class kll_sketch {
     using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
     quantile_return_type get_quantile(double rank, bool inclusive = true) const;
 
-    /**
-     * This returns an array that could have been generated by using get_quantile() for each
-     * rank separately.
-     *
-     * <p>If the sketch is empty this returns an empty vector.
-     *
-     * @param ranks given array of ranks in the hypothetical sorted stream.
-     * These ranks must be in the interval [0.0, 1.0].
-     * @param inclusive if true, the given ranks are considered inclusive (include weights of items)
-     *
-     * @return array of approximate quantiles corresponding to the given ranks in the same order.
-     */
-    std::vector<T, A> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
-
-    /**
-     * This is a multiple-query version of get_quantile() that allows the caller to
-     * specify the number of evenly-spaced ranks.
-     *
-     * <p>If the sketch is empty this returns an empty vector.
-     *
-     * @param num an integer that specifies the number of evenly-spaced ranks.
-     * This must be an integer greater than 0. A value of 1 will return the quantile of rank 0.
-     * A value of 2 will return quantiles of ranks 0 and 1. A value of 3 will return quantiles of ranks 0,
-     * 0.5 (median) and 1, etc.
-     * @param inclusive if true, the ranks are considered inclusive (include weights of items)
-     *
-     * @return array of approximate quantiles corresponding to the given number of evenly-spaced ranks.
-     */
-    std::vector<T, A> get_quantiles(uint32_t num, bool inclusive = true) const;
-
     /**
      * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
      *
@@ -323,18 +288,20 @@ class kll_sketch {
      * <p>If the sketch is empty this returns an empty vector.
      *
      * @param split_points an array of <i>m</i> unique, monotonically increasing items
-     * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
+     * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
+     *
      * @param size the number of split points in the array
      *
-     * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-     * split point, with the exception that the last interval will include the maximum value.
-     * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-     * split point.
+     * @param inclusive if true the rank of an item includes its own weight, and therefore
+     * if the sketch contains items equal to a slit point, then in PMF such items are
+     * included into the interval to the left of split point. Otherwise they are included into the interval
+     * to the right of split point.
      *
      * @return an array of m+1 doubles each of which is an approximation
      * to the fraction of the input stream items (the mass) that fall into one of those intervals.
      */
-    vector_d<A> get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
+    using vector_double = typename quantile_sketch_sorted_view<T, C, A>::vector_double;
+    vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
 
     /**
      * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
@@ -347,19 +314,21 @@ class kll_sketch {
      *
      * @param split_points an array of <i>m</i> unique, monotonically increasing items
      * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
+     *
      * @param size the number of split points in the array
      *
-     * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-     * split point, with the exception that the last interval will include the maximum value.
-     * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-     * split point.
+     * @param inclusive if true the rank of an item includes its own weight, and therefore
+     * if the sketch contains items equal to a slit point, then in CDF such items are
+     * included into the interval to the left of split point. Otherwise they are included into
+     * the interval to the right of split point.
      *
      * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
      * of the input stream given the split_points. The value at array position j of the returned
      * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
-     * array.
+     * array. This can be viewed as array of ranks of the given split points plus one more value
+     * that is always 1.
      */
-    vector_d<A> get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
+    vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
 
     /**
      * Gets the approximate rank error of this sketch normalized as a fraction between zero and one.
@@ -425,7 +394,7 @@ class kll_sketch {
 
     // This is a convenience alias for users
     // The type returned by the following serialize method
-    using vector_bytes = vector_u8<A>;
+    using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
 
     /**
      * This method serializes the sketch as a vector of bytes.
@@ -513,22 +482,22 @@ class kll_sketch {
     uint8_t num_levels_;
     bool is_level_zero_sorted_;
     uint64_t n_;
-    vector_u32<A> levels_;
+    vector_u32 levels_;
     T* items_;
     uint32_t items_size_;
-    T* min_value_;
-    T* max_value_;
+    T* min_item_;
+    T* max_item_;
     mutable quantile_sketch_sorted_view<T, C, A>* sorted_view_;
 
     // for deserialization
     class item_deleter;
     class items_deleter;
-    kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
-        std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
-        std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted);
+    kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
+        std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
+        std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted);
 
     // common update code
-    inline void update_min_max(const T& value);
+    inline void update_min_max(const T& item);
     inline uint32_t internal_update();
 
     // The following code is only valid in the special case of exactly reaching capacity while updating.
@@ -557,24 +526,23 @@ class kll_sketch {
 
     // implementations for floating point types
     template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
-    static const TT& get_invalid_value() {
-      static TT value = std::numeric_limits<TT>::quiet_NaN();
-      return value;
+    static const TT& get_invalid_item() {
+      return std::numeric_limits<TT>::quiet_NaN();
     }
 
     template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
-    static inline bool check_update_value(TT value) {
-      return !std::isnan(value);
+    static inline bool check_update_item(TT item) {
+      return !std::isnan(item);
     }
 
     // implementations for all other types
     template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
-    static const TT& get_invalid_value() {
-      throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of value");
+    static const TT& get_invalid_item() {
+      throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of item");
     }
 
     template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
-    static inline bool check_update_value(TT) {
+    static inline bool check_update_item(TT) {
       return true;
     }
 
diff --git a/kll/include/kll_sketch_impl.hpp b/kll/include/kll_sketch_impl.hpp
index c83c6bd..85eb1c2 100644
--- a/kll/include/kll_sketch_impl.hpp
+++ b/kll/include/kll_sketch_impl.hpp
@@ -44,8 +44,8 @@ n_(0),
 levels_(2, 0, allocator),
 items_(nullptr),
 items_size_(k_),
-min_value_(nullptr),
-max_value_(nullptr),
+min_item_(nullptr),
+max_item_(nullptr),
 sorted_view_(nullptr)
 {
   if (k < MIN_K || k > MAX_K) {
@@ -67,14 +67,14 @@ n_(other.n_),
 levels_(other.levels_),
 items_(nullptr),
 items_size_(other.items_size_),
-min_value_(nullptr),
-max_value_(nullptr),
+min_item_(nullptr),
+max_item_(nullptr),
 sorted_view_(nullptr)
 {
   items_ = allocator_.allocate(items_size_);
   for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
-  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
-  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
+  if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
+  if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
 }
 
 template<typename T, typename C, typename A>
@@ -89,13 +89,13 @@ n_(other.n_),
 levels_(std::move(other.levels_)),
 items_(other.items_),
 items_size_(other.items_size_),
-min_value_(other.min_value_),
-max_value_(other.max_value_),
+min_item_(other.min_item_),
+max_item_(other.max_item_),
 sorted_view_(nullptr)
 {
   other.items_ = nullptr;
-  other.min_value_ = nullptr;
-  other.max_value_ = nullptr;
+  other.min_item_ = nullptr;
+  other.max_item_ = nullptr;
 }
 
 template<typename T, typename C, typename A>
@@ -111,8 +111,8 @@ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(const kll_sketch& other) {
   std::swap(levels_, copy.levels_);
   std::swap(items_, copy.items_);
   std::swap(items_size_, copy.items_size_);
-  std::swap(min_value_, copy.min_value_);
-  std::swap(max_value_, copy.max_value_);
+  std::swap(min_item_, copy.min_item_);
+  std::swap(max_item_, copy.max_item_);
   reset_sorted_view();
   return *this;
 }
@@ -129,8 +129,8 @@ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(kll_sketch&& other) {
   std::swap(levels_, other.levels_);
   std::swap(items_, other.items_);
   std::swap(items_size_, other.items_size_);
-  std::swap(min_value_, other.min_value_);
-  std::swap(max_value_, other.max_value_);
+  std::swap(min_item_, other.min_item_);
+  std::swap(max_item_, other.max_item_);
   reset_sorted_view();
   return *this;
 }
@@ -143,13 +143,13 @@ kll_sketch<T, C, A>::~kll_sketch() {
     for (uint32_t i = begin; i < end; i++) items_[i].~T();
     allocator_.deallocate(items_, items_size_);
   }
-  if (min_value_ != nullptr) {
-    min_value_->~T();
-    allocator_.deallocate(min_value_, 1);
+  if (min_item_ != nullptr) {
+    min_item_->~T();
+    allocator_.deallocate(min_item_, 1);
   }
-  if (max_value_ != nullptr) {
-    max_value_->~T();
-    allocator_.deallocate(max_value_, 1);
+  if (max_item_ != nullptr) {
+    max_item_->~T();
+    allocator_.deallocate(max_item_, 1);
   }
   reset_sorted_view();
 }
@@ -167,8 +167,8 @@ n_(other.n_),
 levels_(other.levels_, allocator_),
 items_(nullptr),
 items_size_(other.items_size_),
-min_value_(nullptr),
-max_value_(nullptr),
+min_item_(nullptr),
+max_item_(nullptr),
 sorted_view_(nullptr)
 {
   static_assert(
@@ -177,29 +177,29 @@ sorted_view_(nullptr)
   );
   items_ = allocator_.allocate(items_size_);
   for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
-  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
-  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
+  if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
+  if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
   check_sorting();
 }
 
 template<typename T, typename C, typename A>
 template<typename FwdT>
-void kll_sketch<T, C, A>::update(FwdT&& value) {
-  if (!check_update_value(value)) { return; }
-  update_min_max(value);
+void kll_sketch<T, C, A>::update(FwdT&& item) {
+  if (!check_update_item(item)) { return; }
+  update_min_max(item);
   const uint32_t index = internal_update();
-  new (&items_[index]) T(std::forward<FwdT>(value));
+  new (&items_[index]) T(std::forward<FwdT>(item));
   reset_sorted_view();
 }
 
 template<typename T, typename C, typename A>
-void kll_sketch<T, C, A>::update_min_max(const T& value) {
+void kll_sketch<T, C, A>::update_min_max(const T& item) {
   if (is_empty()) {
-    min_value_ = new (allocator_.allocate(1)) T(value);
-    max_value_ = new (allocator_.allocate(1)) T(value);
+    min_item_ = new (allocator_.allocate(1)) T(item);
+    max_item_ = new (allocator_.allocate(1)) T(item);
   } else {
-    if (C()(value, *min_value_)) *min_value_ = value;
-    if (C()(*max_value_, value)) *max_value_ = value;
+    if (C()(item, *min_item_)) *min_item_ = item;
+    if (C()(*max_item_, item)) *max_item_ = item;
   }
 }
 
@@ -219,11 +219,11 @@ void kll_sketch<T, C, A>::merge(FwdSk&& other) {
     throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
   }
   if (is_empty()) {
-    min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
-    max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
+    min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
+    max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
   } else {
-    if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
-    if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
+    if (C()(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
+    if (C()(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
   }
   const uint64_t final_n = n_ + other.n_;
   for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
@@ -264,14 +264,14 @@ bool kll_sketch<T, C, A>::is_estimation_mode() const {
 
 template<typename T, typename C, typename A>
 T kll_sketch<T, C, A>::get_min_item() const {
-  if (is_empty()) return get_invalid_value();
-  return *min_value_;
+  if (is_empty()) return get_invalid_item();
+  return *min_item_;
 }
 
 template<typename T, typename C, typename A>
 T kll_sketch<T, C, A>::get_max_item() const {
-  if (is_empty()) return get_invalid_value();
-  return *max_value_;
+  if (is_empty()) return get_invalid_item();
+  return *max_item_;
 }
 
 template<typename T, typename C, typename A>
@@ -281,7 +281,7 @@ C kll_sketch<T, C, A>::get_comparator() const {
 
 template<typename T, typename C, typename A>
 auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   if ((rank < 0.0) || (rank > 1.0)) {
     throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
   }
@@ -290,42 +290,6 @@ auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> qua
   return sorted_view_->get_quantile(rank, inclusive);
 }
 
-template<typename T, typename C, typename A>
-std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
-  std::vector<T, A> quantiles(allocator_);
-  if (is_empty()) return quantiles;
-  quantiles.reserve(size);
-
-  // may have a side effect of sorting level zero if needed
-  setup_sorted_view();
-
-  for (uint32_t i = 0; i < size; i++) {
-    const double rank = ranks[i];
-    if ((rank < 0.0) || (rank > 1.0)) {
-      throw std::invalid_argument("normalized rank cannot be less than 0 or greater than 1");
-    }
-    quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
-  }
-  return quantiles;
-}
-
-template<typename T, typename C, typename A>
-std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
-  if (is_empty()) return std::vector<T, A>(allocator_);
-  if (num == 0) {
-    throw std::invalid_argument("num must be > 0");
-  }
-  vector_d<A> ranks(num, 0, allocator_);
-  ranks[0] = 0.0;
-  for (size_t i = 1; i < num; i++) {
-    ranks[i] = static_cast<double>(i) / (num - 1);
-  }
-  if (num > 1) {
-    ranks[num - 1] = 1.0;
-  }
-  return get_quantiles(ranks.data(), num, inclusive);
-}
-
 template<typename T, typename C, typename A>
 double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
   if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
@@ -334,13 +298,13 @@ double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
 }
 
 template<typename T, typename C, typename A>
-vector_d<A> kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const {
+auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
   setup_sorted_view();
   return sorted_view_->get_PMF(split_points, size, inclusive);
 }
 
 template<typename T, typename C, typename A>
-vector_d<A> kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const {
+auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
   setup_sorted_view();
   return sorted_view_->get_CDF(split_points, size, inclusive);
 }
@@ -372,8 +336,8 @@ size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
   }
   // the last integer in the levels_ array is not serialized because it can be derived
   size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
-  size += sd.size_of_item(*min_value_);
-  size += sd.size_of_item(*max_value_);
+  size += sd.size_of_item(*min_item_);
+  size += sd.size_of_item(*max_item_);
   for (auto it: *this) size += sd.size_of_item(it.first);
   return size;
 }
@@ -425,18 +389,18 @@ void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
     write(os, num_levels_);
     write(os, unused);
     write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
-    sd.serialize(os, min_value_, 1);
-    sd.serialize(os, max_value_, 1);
+    sd.serialize(os, min_item_, 1);
+    sd.serialize(os, max_item_, 1);
   }
   sd.serialize(os, &items_[levels_[0]], get_num_retained());
 }
 
 template<typename T, typename C, typename A>
 template<typename SerDe>
-vector_u8<A> kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
+auto kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
   const bool is_single_item = n_ == 1;
   const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
-  vector_u8<A> bytes(size, 0, allocator_);
+  vector_bytes bytes(size, 0, allocator_);
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
   const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
@@ -461,8 +425,8 @@ vector_u8<A> kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const Se
       ptr += copy_to_mem(num_levels_, ptr);
       ptr += sizeof(uint8_t); // unused
       ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
-      ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
-      ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
+      ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
+      ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
     }
     const size_t bytes_remaining = end_ptr - ptr;
     ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
@@ -506,7 +470,7 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const Ser
     num_levels = read<uint8_t>(is);
     read<uint8_t>(is); // skip unused byte
   }
-  vector_u32<A> levels(num_levels + 1, 0, allocator);
+  vector_u32 levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -517,17 +481,17 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const Ser
   levels[num_levels] = capacity;
   A alloc(allocator);
   auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
-  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
   if (!is_single_item) {
-    sd.deserialize(is, min_value_buffer.get(), 1);
+    sd.deserialize(is, min_item_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    sd.deserialize(is, max_value_buffer.get(), 1);
+    min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+    sd.deserialize(is, max_item_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+    max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
   }
   auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
   std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -537,17 +501,17 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const Ser
   std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
-    new (min_value_buffer.get()) T(items.get()[levels[0]]);
+    new (min_item_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    new (max_value_buffer.get()) T(items.get()[levels[0]]);
+    min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+    new (max_item_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+    max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
   }
   if (!is.good())
     throw std::runtime_error("error reading from std::istream");
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
-      std::move(min_value), std::move(max_value), is_level_zero_sorted);
+      std::move(min_item), std::move(max_item), is_level_zero_sorted);
 }
 
 template<typename T, typename C, typename A>
@@ -593,7 +557,7 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t s
     ptr += copy_from_mem(ptr, num_levels);
     ptr += sizeof(uint8_t); // skip unused byte
   }
-  vector_u32<A> levels(num_levels + 1, 0, allocator);
+  vector_u32 levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
   if (is_single_item) {
     levels[0] = capacity - 1;
@@ -604,17 +568,17 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t s
   levels[num_levels] = capacity;
   A alloc(allocator);
   auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
-  std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
-  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
-  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
+  std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
+  std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
   if (!is_single_item) {
-    ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
+    ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
+    min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+    ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+    max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
   }
   auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
   std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -626,15 +590,15 @@ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t s
   if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
   const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
   if (is_single_item) {
-    new (min_value_buffer.get()) T(items.get()[levels[0]]);
+    new (min_item_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    new (max_value_buffer.get()) T(items.get()[levels[0]]);
+    min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
+    new (max_item_buffer.get()) T(items.get()[levels[0]]);
     // copy did not throw, repackage with destrtuctor
-    max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
+    max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
   }
   return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
-      std::move(min_value), std::move(max_value), is_level_zero_sorted);
+      std::move(min_item), std::move(max_item), is_level_zero_sorted);
 }
 
 /*
@@ -653,9 +617,9 @@ double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
 
 // for deserialization
 template<typename T, typename C, typename A>
-kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
-    std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
-    std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
+kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
+    std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
+    std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted):
 allocator_(levels.get_allocator()),
 k_(k),
 m_(DEFAULT_M),
@@ -666,8 +630,8 @@ n_(n),
 levels_(std::move(levels)),
 items_(items.release()),
 items_size_(items_size),
-min_value_(min_value.release()),
-max_value_(max_value.release()),
+min_item_(min_item.release()),
+max_item_(max_item.release()),
 sorted_view_(nullptr)
 {}
 
@@ -818,8 +782,8 @@ void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
   const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
   const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
   const size_t work_levels_size = ub + 2; // ub+1 does not work
-  vector_u32<A> worklevels(work_levels_size, 0, allocator_);
-  vector_u32<A> outlevels(work_levels_size, 0, allocator_);
+  vector_u32 worklevels(work_levels_size, 0, allocator_);
+  vector_u32 outlevels(work_levels_size, 0, allocator_);
 
   const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
 
@@ -959,8 +923,8 @@ string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) co
   os << "   Capacity items : " << items_size_ << std::endl;
   os << "   Retained items : " << get_num_retained() << std::endl;
   if (!is_empty()) {
-    os << "   Min value      : " << *min_value_ << std::endl;
-    os << "   Max value      : " << *max_value_ << std::endl;
+    os << "   Min item      : " << *min_item_ << std::endl;
+    os << "   Max item      : " << *max_item_ << std::endl;
   }
   os << "### End sketch summary" << std::endl;
 
diff --git a/kll/test/kll_sketch_test.cpp b/kll/test/kll_sketch_test.cpp
index 0d9d5a6..aec8535 100644
--- a/kll/test/kll_sketch_test.cpp
+++ b/kll/test/kll_sketch_test.cpp
@@ -67,8 +67,6 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
     REQUIRE(std::isnan(sketch.get_min_item()));
     REQUIRE(std::isnan(sketch.get_max_item()));
     REQUIRE(std::isnan(sketch.get_quantile(0.5)));
-    const double fractions[3] {0, 0.5, 1};
-    REQUIRE(sketch.get_quantiles(fractions, 3).size() == 0);
     const float split_points[1] {0};
     REQUIRE(sketch.get_PMF(split_points, 1).size() == 0);
     REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
@@ -99,12 +97,6 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
     REQUIRE(sketch.get_min_item() == 1.0);
     REQUIRE(sketch.get_max_item() == 1.0);
     REQUIRE(sketch.get_quantile(0.5) == 1.0);
-    const double fractions[3] {0, 0.5, 1};
-    auto quantiles = sketch.get_quantiles(fractions, 3);
-    REQUIRE(quantiles.size() == 3);
-    REQUIRE(quantiles[0] == 1.0);
-    REQUIRE(quantiles[1] == 1.0);
-    REQUIRE(quantiles[2] == 1.0);
 
     int count = 0;
     for (auto it: sketch) {
@@ -139,26 +131,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
     REQUIRE(sketch.get_max_item() == n - 1);
     REQUIRE(sketch.get_quantile(1) == n - 1);
 
-    const double ranks[3] {0, 0.5, 1};
-    auto quantiles = sketch.get_quantiles(ranks, 3, false);
-    REQUIRE(quantiles.size() == 3);
-    REQUIRE(quantiles[0] == 0.0);
-    REQUIRE(quantiles[1] == n / 2);
-    REQUIRE(quantiles[2] == n - 1 );
-
     for (uint32_t i = 0; i < n; i++) {
       const double true_rank = (double) i / n;
       REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank);
       const double true_rank_inclusive = (double) (i + 1) / n;
       REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
     }
-
-    // the alternative method must produce the same result
-    auto quantiles2 = sketch.get_quantiles(3, false);
-    REQUIRE(quantiles2.size() == 3);
-    REQUIRE(quantiles[0] == quantiles2[0]);
-    REQUIRE(quantiles[1] == quantiles2[1]);
-    REQUIRE(quantiles[2] == quantiles2[2]);
   }
 
   SECTION("10 items") {
@@ -207,25 +185,6 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
       REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
     }
 
-    // test quantiles at every 0.1 percentage point
-    double ranks[1001];
-    double reverse_ranks[1001]; // check that ordering does not matter
-    for (int i = 0; i < 1001; i++) {
-      ranks[i] = (double) i / 1000;
-      reverse_ranks[1000 - i] = ranks[i];
-    }
-    auto quantiles = sketch.get_quantiles(ranks, 1001);
-    auto reverse_quantiles = sketch.get_quantiles(reverse_ranks, 1001);
-    float previous_quantile = 0;
-    for (int i = 0; i < 1001; i++) {
-      // expensive in a loop, just to check the equivalence here, not advised for real code
-      const float quantile = sketch.get_quantile(ranks[i]);
-      REQUIRE(quantiles[i] == quantile);
-      REQUIRE(reverse_quantiles[1000 - i] == quantile);
-      REQUIRE(previous_quantile <= quantile);
-      previous_quantile = quantile;
-    }
-
     //std::cout << sketch.to_string();
 
     uint32_t count = 0;
diff --git a/kll/test/kll_sketch_validation.cpp b/kll/test/kll_sketch_validation.cpp
index 31ab2c1..2dbf60d 100644
--- a/kll/test/kll_sketch_validation.cpp
+++ b/kll/test/kll_sketch_validation.cpp
@@ -151,11 +151,11 @@ const int64_t correct_results[num_tests * 7] = {
     113, 200, 8311133, 6554171, 16, 637, 121111429906734123
 };
 
-static std::unique_ptr<int[]> make_input_array(unsigned n, unsigned stride) {
+static std::vector<int> make_input_array(unsigned n, unsigned stride) {
   if (!kll_helper::is_odd(stride)) throw std::logic_error("stride must be odd");
   unsigned mask = (1 << 23) - 1; // because items are single-precision floats at the moment
   unsigned cur = 0;
-  std::unique_ptr<int[]> arr(new int[n]);
+  std::vector<int> arr(n, 0);
   for (unsigned i = 0; i < n; i++) {
     cur += stride;
     cur &= mask;
@@ -187,7 +187,7 @@ TEST_CASE("kll validation", "[kll_sketch][validation]") {
     unsigned k = correct_results[7 * i + 1];
     unsigned n = correct_results[7 * i + 2];
     unsigned stride = correct_results[7 * i + 3];
-    std::unique_ptr<int[]> input_array = make_input_array(n, stride);
+    auto input_array = make_input_array(n, stride);
     kll_sketch<float> sketch(k);
     kll_next_offset = 0;
     for (unsigned j = 0; j < n; j++) {
@@ -201,7 +201,7 @@ TEST_CASE("kll validation", "[kll_sketch][validation]") {
     if (correct_results[7 * i + 6] == p.first) {
       std::cout << " pass" << std::endl;
     } else {
-      std::cout << " " << (correct_results[7 * i + 6]) << " != " << p.first;
+      std::cout << " " << (correct_results[7 * i + 6]) << " != " << p.first << "\n";
       std::cout << sketch.to_string();
       FAIL();
     }
diff --git a/quantiles/include/quantiles_sketch.hpp b/quantiles/include/quantiles_sketch.hpp
index 5a9b95f..d6efec0 100644
--- a/quantiles/include/quantiles_sketch.hpp
+++ b/quantiles/include/quantiles_sketch.hpp
@@ -37,7 +37,7 @@ namespace datasketches {
  * the Probability Mass Function from get_PMF() and the Cumulative Distribution Function from get_CDF().
  *
  * <p>Consider a large stream of one million values such as packet sizes coming into a network node.
- * The natural rank of any specific size value is simply its index in the hypothetical sorted
+ * The natural rank of any specific size value is its index in the hypothetical sorted
  * array of values.
  * The normalized rank is the natural rank divided by the stream size,
  * in this case one million.
@@ -54,7 +54,7 @@ namespace datasketches {
  * <li>20% of the values were &ge; 500 and &lt; 900, and</li>
  * <li>10% of the values were &ge; 900.</li>
  * </ul>
- * A frequency histogram can be obtained by simply multiplying these fractions by get_n(),
+ * A frequency histogram can be obtained by multiplying these fractions by get_n(),
  * which is the total count of values received.
  * The get_CDF() works similarly, but produces the cumulative distribution instead.
  *
@@ -65,8 +65,8 @@ namespace datasketches {
  * <p>The accuracy of this sketch is a function of the configured value <i>k</i>, which also affects
  * the overall size of the sketch. Accuracy of this quantile sketch is always with respect to
  * the normalized rank. A <i>k</i> of 128 produces a normalized, rank error of about 1.7%.
- * For example, the median value returned from getQuantile(0.5) will be between the actual values
- * from the hypothetically sorted array of input values at normalized ranks of 0.483 and 0.517, with
+ * For example, the median item returned from getQuantile(0.5) will be between the actual items
+ * from the hypothetically sorted array of input items at normalized ranks of 0.483 and 0.517, with
  * a confidence of about 99%.</p>
  *
  * <pre>
@@ -119,17 +119,17 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
  * by Agarwal, Cormode, Huang, Phillips, Wei, and Yi.
  * <a href="http://dblp.org/rec/html/journals/tods/AgarwalCHPWY13"></a></p>
  *
- * <p>This algorithm is independent of the distribution of values and
- * requires only that the values be comparable.</p>
+ * <p>This algorithm is independent of the distribution of items and
+ * requires only that the items be comparable.</p>
  *
- * <p>This algorithm intentionally inserts randomness into the sampling process for values that
+ * <p>This algorithm intentionally inserts randomness into the sampling process for items that
  * ultimately get retained in the sketch. The results produced by this algorithm are not
  * deterministic. For example, if the same stream is inserted into two different instances of this
  * sketch, the answers obtained from the two sketches may not be identical.</p>
  *
  * <p>Similarly, there may be directional inconsistencies. For example, the result
  * obtained from get_quantile(rank) input into the reverse directional query
- * get_rank(item) may not result in the original value.</p>
+ * get_rank(item) may not result in the original item.</p>
  *
  * @author Kevin Lang
  * @author Lee Rhodes
@@ -254,37 +254,6 @@ public:
   using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
   quantile_return_type get_quantile(double rank, bool inclusive = true) const;
 
-  /**
-   * This is a multiple-query version of get_quantile().
-   * <p>
-   * This returns an array that could have been generated by using get_quantile() for each
-   * fractional rank separately.
-   *
-   * <p>If the sketch is empty this returns an empty vector.
-   *
-   * @param ranks given array of normalized ranks in the hypothetical sorted stream.
-   * These ranks must be in the interval [0.0, 1.0], inclusive.
-   *
-   * @return array of approximations to items associated with given ranks in the same order as given ranks
-   * in the input array.
-   */
-  std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
-
-  /**
-   * This is a multiple-query version of get_quantile() that allows the caller to
-   * specify the number of evenly-spaced normalized ranks.
-   *
-   * <p>If the sketch is empty this returns an empty vector.
-   *
-   * @param num an integer that specifies the number of evenly-spaced ranks.
-   * This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
-   * A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
-   * get_quantiles([0, 0.5, 1]), etc.
-   *
-   * @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
-   */
-  std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
-
   /**
    * Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
    *
@@ -311,17 +280,17 @@ public:
    * <p>If the sketch is empty this returns an empty vector.
    *
    * @param split_points an array of <i>m</i> unique, monotonically increasing items
-   * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
+   * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
    *
    * @param size of the array of split points.
    *
-   * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-   * split point, with the exception that the last interval will include the maximum item.
-   * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-   * split point.
+   * @param inclusive if true the rank of an item includes its own weight, and therefore
+   * if the sketch contains items equal to a slit point, then in PMF such items are
+   * included into the interval to the left of split point. Otherwise they are included into the interval
+   * to the right of split point.
    *
    * @return an array of m+1 doubles each of which is an approximation
-   * to the fraction of the input stream values (the mass) that fall into one of those intervals.
+   * to the fraction of the input stream items (the mass) that fall into one of those intervals.
    */
   vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
 
@@ -339,15 +308,16 @@ public:
    *
    * @param size of the array of split points.
    *
-   * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-   * split point, with the exception that the last interval will include the maximum item.
-   * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-   * split point.
+   * @param inclusive if true the rank of an item includes its own weight, and therefore
+   * if the sketch contains items equal to a slit point, then in CDF such items are
+   * included into the interval to the left of split point. Otherwise they are included into
+   * the interval to the right of split point.
    *
    * @return an array of m+1 double values, which are a consecutive approximation to the CDF
    * of the input stream given the split_points. The value at array position j of the returned
    * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
-   * array.
+   * array. This can be viewed as array of ranks of the given split points plus one more value
+   * that is always 1.
    */
   vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
 
@@ -558,9 +528,8 @@ private:
 
   // implementations for floating point types
   template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
-  static const TT& get_invalid_value() {
-    static TT value = std::numeric_limits<TT>::quiet_NaN();
-    return value;
+  static const TT& get_invalid_item() {
+    return std::numeric_limits<TT>::quiet_NaN();
   }
 
   template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
@@ -570,8 +539,8 @@ private:
 
   // implementations for all other types
   template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
-  static const TT& get_invalid_value() {
-    throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
+  static const TT& get_invalid_item() {
+    throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of items");
   }
 
   template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
diff --git a/quantiles/include/quantiles_sketch_impl.hpp b/quantiles/include/quantiles_sketch_impl.hpp
index f5d80b3..713327b 100644
--- a/quantiles/include/quantiles_sketch_impl.hpp
+++ b/quantiles/include/quantiles_sketch_impl.hpp
@@ -675,13 +675,13 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
 
 template<typename T, typename C, typename A>
 const T& quantiles_sketch<T, C, A>::get_min_item() const {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   return *min_item_;
 }
 
 template<typename T, typename C, typename A>
 const T& quantiles_sketch<T, C, A>::get_max_item() const {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   return *max_item_;
 }
 
@@ -750,7 +750,7 @@ quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view(
 
 template<typename T, typename C, typename A>
 auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   if ((rank < 0.0) || (rank > 1.0)) {
     throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
   }
@@ -1111,7 +1111,7 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
     throw std::logic_error("Failed internal consistency check after standard_merge()");
   }
 
-  // update min and max values
+  // update min and max items
   // can't just check is_empty() since min and max might not have been set if
   // there were no base buffer items added via update()
   if (tgt.min_item_ == nullptr) {
@@ -1187,7 +1187,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
     throw std::logic_error("Failed internal consistency check after downsampling_merge()");
   }
 
-  // update min and max values
+  // update min and max items
   // can't just check is_empty() since min and max might not have been set if
   // there were no base buffer items added via update()
   if (tgt.min_item_ == nullptr) {
diff --git a/req/include/req_sketch.hpp b/req/include/req_sketch.hpp
index 0b9a255..cab57ac 100755
--- a/req/include/req_sketch.hpp
+++ b/req/include/req_sketch.hpp
@@ -158,16 +158,16 @@ public:
    * <p>If the sketch is empty this returns an empty vector.
    *
    * @param split_points an array of <i>m</i> unique, monotonically increasing items
-   * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
+   * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
    *
-   * @param size of the array of split points.
+   * @param size the number of split points in the array
    *
-   * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-   * split point, with the exception that the last interval will include the maximum item.
-   * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-   * split point.
+   * @param inclusive if true the rank of an item includes its own weight, and therefore
+   * if the sketch contains items equal to a slit point, then in PMF such items are
+   * included into the interval to the left of split point. Otherwise they are included into the interval
+   * to the right of split point.
    *
-   * @return an array of m+1 double values each of which is an approximation
+   * @return an array of m+1 doubles each of which is an approximation
    * to the fraction of the input stream items (the mass) that fall into one of those intervals.
    */
   vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
@@ -181,17 +181,18 @@ public:
    * @param split_points an array of <i>m</i> unique, monotonically increasing items
    * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
    *
-   * @param size of the array of split points.
+   * @param size the number of split points in the array
    *
-   * @param inclusive if false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
-   * split point, with the exception that the last interval will include the maximum item.
-   * If true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
-   * split point.
+   * @param inclusive if true the rank of an item includes its own weight, and therefore
+   * if the sketch contains items equal to a slit point, then in CDF such items are
+   * included into the interval to the left of split point. Otherwise they are included into
+   * the interval to the right of split point.
    *
-   * @return an array of m+1 double values, which are a consecutive approximation to the CDF
+   * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
    * of the input stream given the split_points. The value at array position j of the returned
    * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
-   * array.
+   * array. This can be viewed as array of ranks of the given split points plus one more value
+   * that is always 1.
    */
   vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
 
@@ -206,13 +207,6 @@ public:
   using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
   quantile_return_type get_quantile(double rank, bool inclusive = true) const;
 
-  /**
-   * Returns an array of quantiles that correspond to the given array of normalized ranks.
-   * @param ranks given array of normalized ranks.
-   * @return array of quantiles that correspond to the given array of normalized ranks
-   */
-  std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
-
   /**
    * Returns an approximate lower bound of the given normalized rank.
    * @param rank the given rank, a value between 0 and 1.0.
@@ -362,19 +356,18 @@ private:
 
   // implementations for floating point types
   template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
-  static const TT& get_invalid_value() {
-    static TT value = std::numeric_limits<TT>::quiet_NaN();
-    return value;
+  static const TT& get_invalid_item() {
+    return std::numeric_limits<TT>::quiet_NaN();
   }
 
   template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
-  static inline bool check_update_item(const TT& value) {
-    return !std::isnan(value);
+  static inline bool check_update_item(const TT& item) {
+    return !std::isnan(item);
   }
 
   // implementations for all other types
   template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
-  static const TT& get_invalid_value() {
+  static const TT& get_invalid_item() {
     throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of items");
   }
 
diff --git a/req/include/req_sketch_impl.hpp b/req/include/req_sketch_impl.hpp
index d404fd7..6cc714f 100755
--- a/req/include/req_sketch_impl.hpp
+++ b/req/include/req_sketch_impl.hpp
@@ -222,13 +222,13 @@ void req_sketch<T, C, A>::merge(FwdSk&& other) {
 
 template<typename T, typename C, typename A>
 const T& req_sketch<T, C, A>::get_min_item() const {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   return *min_item_;
 }
 
 template<typename T, typename C, typename A>
 const T& req_sketch<T, C, A>::get_max_item() const {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   return *max_item_;
 }
 
@@ -260,7 +260,7 @@ auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inc
 
 template<typename T, typename C, typename A>
 auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
-  if (is_empty()) return get_invalid_value();
+  if (is_empty()) return get_invalid_item();
   if ((rank < 0.0) || (rank > 1.0)) {
     throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
   }
@@ -269,25 +269,6 @@ auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> qua
   return sorted_view_->get_quantile(rank, inclusive);
 }
 
-template<typename T, typename C, typename A>
-std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
-  std::vector<T, A> quantiles(allocator_);
-  if (is_empty()) return quantiles;
-  quantiles.reserve(size);
-
-  // possible side-effect of sorting level zero
-  setup_sorted_view();
-
-  for (uint32_t i = 0; i < size; ++i) {
-    const double rank = ranks[i];
-    if ((rank < 0.0) || (rank > 1.0)) {
-      throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
-    }
-    quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
-  }
-  return quantiles;
-}
-
 template<typename T, typename C, typename A>
 quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
   if (!compactors_[0].is_sorted()) {
diff --git a/req/test/req_sketch_test.cpp b/req/test/req_sketch_test.cpp
index abe4979..e427039 100755
--- a/req/test/req_sketch_test.cpp
+++ b/req/test/req_sketch_test.cpp
@@ -50,8 +50,6 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
   REQUIRE(std::isnan(sketch.get_quantile(0)));
   REQUIRE(std::isnan(sketch.get_quantile(0.5)));
   REQUIRE(std::isnan(sketch.get_quantile(1)));
-  const double ranks[3] {0, 0.5, 1};
-  REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
 
   const float split_points[1] {0};
   REQUIRE(sketch.get_CDF(split_points, 1).empty());
@@ -74,13 +72,6 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
   REQUIRE(sketch.get_quantile(0.5, false) == 1);
   REQUIRE(sketch.get_quantile(1, false) == 1);
 
-  const double ranks[3] {0, 0.5, 1};
-  auto quantiles = sketch.get_quantiles(ranks, 3, false);
-  REQUIRE(quantiles.size() == 3);
-  REQUIRE(quantiles[0] == 1);
-  REQUIRE(quantiles[1] == 1);
-  REQUIRE(quantiles[2] == 1);
-
   unsigned count = 0;
   for (auto it: sketch) {
     REQUIRE(it.second == 1);
@@ -143,13 +134,6 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
   REQUIRE(sketch.get_quantile(0.9) == 9);
   REQUIRE(sketch.get_quantile(1) == 10);
 
-  const double ranks[3] {0, 0.5, 1};
-  auto quantiles = sketch.get_quantiles(ranks, 3, false);
-  REQUIRE(quantiles.size() == 3);
-  REQUIRE(quantiles[0] == 1);
-  REQUIRE(quantiles[1] == 6);
-  REQUIRE(quantiles[2] == 10);
-
   const float splits[3] {2, 6, 9};
   auto cdf = sketch.get_CDF(splits, 3, false);
   REQUIRE(cdf[0] == 0.1);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org