You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/07/05 16:39:59 UTC

[datasketches-java] 01/01: added inclusive mode

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch quantiles_inclusive
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 3d64fb7a5dd517db386e1b99b11d6f7eb22e994e
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Jul 5 09:39:52 2022 -0700

    added inclusive mode
---
 .../datasketches/quantiles/DoublesAuxiliary.java   |  5 +-
 .../datasketches/quantiles/DoublesPmfCdfImpl.java  | 24 +++---
 .../datasketches/quantiles/DoublesSketch.java      | 93 +++++++++++++++++---
 .../datasketches/quantiles/ItemsAuxiliary.java     |  6 +-
 .../datasketches/quantiles/ItemsPmfCdfImpl.java    | 29 ++++---
 .../apache/datasketches/quantiles/ItemsSketch.java | 99 +++++++++++++++++-----
 .../datasketches/quantiles/KolmogorovSmirnov.java  |  4 +-
 .../quantiles/HeapUpdateDoublesSketchTest.java     | 72 +++++++++++++++-
 .../datasketches/quantiles/ItemsSketchTest.java    | 71 +++++++++++++++-
 .../apache/datasketches/quantiles/UtilTest.java    | 12 +--
 10 files changed, 336 insertions(+), 79 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
index 344d9c14..96e3afa9 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
@@ -41,8 +41,9 @@ final class DoublesAuxiliary {
   /**
    * Constructs the Auxiliary structure from the DoublesSketch
    * @param qs a DoublesSketch
+   * @param inclusive if true, fractional ranks are considered inclusive
    */
-  DoublesAuxiliary(final DoublesSketch qs ) {
+  DoublesAuxiliary(final DoublesSketch qs, final boolean inclusive) {
     final int k = qs.getK();
     final long n = qs.getN();
     final long bitPattern = qs.getBitPattern();
@@ -60,7 +61,7 @@ final class DoublesAuxiliary {
     //  taking advantage of the already sorted blocks of length k
     blockyTandemMergeSort(itemsArr, cumWtsArr, numSamples, k);
 
-    final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr, false);
+    final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr, inclusive);
     assert total == n;
 
     auxN_ = n;
diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesPmfCdfImpl.java b/src/main/java/org/apache/datasketches/quantiles/DoublesPmfCdfImpl.java
index 29134473..a0e63ea8 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesPmfCdfImpl.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesPmfCdfImpl.java
@@ -27,8 +27,9 @@ package org.apache.datasketches.quantiles;
  */
 class DoublesPmfCdfImpl {
 
-  static double[] getPMFOrCDF(final DoublesSketch sketch, final double[] splitPoints, final boolean isCDF) {
-    final double[] buckets = internalBuildHistogram(sketch, splitPoints);
+  static double[] getPMFOrCDF(final DoublesSketch sketch, final double[] splitPoints,
+      final boolean isCDF, final boolean inclusive) {
+    final double[] buckets = internalBuildHistogram(sketch, splitPoints, inclusive);
     final long n = sketch.getN();
     if (isCDF) {
       double subtotal = 0;
@@ -52,7 +53,8 @@ class DoublesPmfCdfImpl {
    * that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
    * @return the unnormalized, accumulated counts of <i>m + 1</i> intervals.
    */
-  private static double[] internalBuildHistogram(final DoublesSketch sketch, final double[] splitPoints) {
+  private static double[] internalBuildHistogram(final DoublesSketch sketch, final double[] splitPoints,
+      final boolean inclusive) {
     final DoublesSketchAccessor sketchAccessor = DoublesSketchAccessor.wrap(sketch);
     Util.checkSplitPointsOrder(splitPoints);
 
@@ -65,12 +67,12 @@ class DoublesPmfCdfImpl {
     if (numSplitPoints < 50) { // empirically determined crossover
       // sort not worth it when few split points
       DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(
-              sketchAccessor, weight, splitPoints, counters);
+              sketchAccessor, weight, splitPoints, counters, inclusive);
     } else {
       sketchAccessor.sort();
       // sort is worth it when many split points
       DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(
-              sketchAccessor, weight, splitPoints, counters);
+              sketchAccessor, weight, splitPoints, counters, inclusive);
     }
 
     long myBitPattern = sketch.getBitPattern();
@@ -82,7 +84,7 @@ class DoublesPmfCdfImpl {
         // the levels are already sorted so we can use the fast version
         sketchAccessor.setLevel(lvl);
         DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(
-                sketchAccessor, weight, splitPoints, counters);
+                sketchAccessor, weight, splitPoints, counters, inclusive);
       }
     }
     return counters;
@@ -96,16 +98,17 @@ class DoublesPmfCdfImpl {
    * @param weight of the samples
    * @param splitPoints must be unique and sorted. Number of splitPoints + 1 == counters.length.
    * @param counters array of counters
+   * @param inclusive if true the weight of a value is included into its rank.
    */
   static void bilinearTimeIncrementHistogramCounters(final DoublesBufferAccessor samples, final long weight,
-      final double[] splitPoints, final double[] counters) {
+      final double[] splitPoints, final double[] counters, final boolean inclusive) {
     assert ((splitPoints.length + 1) == counters.length);
     for (int i = 0; i < samples.numItems(); i++) {
       final double sample = samples.get(i);
       int j;
       for (j = 0; j < splitPoints.length; j++) {
         final double splitpoint = splitPoints[j];
-        if (sample < splitpoint) {
+        if (inclusive ? sample <= splitpoint : sample < splitpoint) {
           break;
         }
       }
@@ -126,13 +129,14 @@ class DoublesPmfCdfImpl {
    * @param weight of the samples
    * @param splitPoints must be unique and sorted. Number of splitPoints + 1 = counters.length.
    * @param counters array of counters
+   * @param inclusive if true the weight of a value is included into its rank.
    */
   static void linearTimeIncrementHistogramCounters(final DoublesBufferAccessor samples, final long weight,
-      final double[] splitPoints, final double[] counters) {
+      final double[] splitPoints, final double[] counters, final boolean inclusive) {
     int i = 0;
     int j = 0;
     while ((i < samples.numItems()) && (j < splitPoints.length)) {
-      if (samples.get(i) < splitPoints[j]) {
+      if (inclusive ? samples.get(i) <= splitPoints[j] : samples.get(i) < splitPoints[j]) {
         counters[j] += weight; // this sample goes into this bucket
         i++; // move on to next sample and see whether it also goes into this bucket
       } else {
diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
index 23f5bca0..4235a0e8 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesSketch.java
@@ -211,19 +211,30 @@ public abstract class DoublesSketch {
    * If fraction = 0.0, the true minimum value of the stream is returned.
    * If fraction = 1.0, the true maximum value of the stream is returned.
    *
+   * @param inclusive if true, the given fraction (rank) is considered inclusive
+   * 
    * @return the approximation to the value at the above fraction
    */
-  public double getQuantile(final double fraction) {
+  public double getQuantile(final double fraction, final boolean inclusive) {
     if (isEmpty()) { return Double.NaN; }
     if (fraction < 0.0 || fraction > 1.0) {
       throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0");
     }
     if      (fraction == 0.0) { return getMinValue(); }
     if (fraction == 1.0) { return getMaxValue(); }
-    final DoublesAuxiliary aux = new DoublesAuxiliary(this);
+    final DoublesAuxiliary aux = new DoublesAuxiliary(this, inclusive);
     return aux.getQuantile(fraction);
   }
 
+  /**
+   * Same as {@link #getQuantile(double, boolean) getQuantile(double fraction, false)}
+   * @param fraction fractional rank
+   * @return quantile
+   */
+  public double getQuantile(final double fraction) {
+    return getQuantile(fraction, false);
+  }
+
   /**
    * Gets the upper bound of the value interval in which the true quantile of the given rank
    * exists with a confidence of at least 99%.
@@ -261,10 +272,12 @@ public abstract class DoublesSketch {
    * sorted stream of all the input values seen so far.
    * These fRanks must all be in the interval [0.0, 1.0] inclusively.
    *
+   * @param inclusive if true, the given fractional ranks are considered inclusive
+   * 
    * @return array of approximate quantiles of the given fRanks in the same order as in the given
    * fRanks array.
    */
-  public double[] getQuantiles(final double[] fRanks) {
+  public double[] getQuantiles(final double[] fRanks, final boolean inclusive) {
     if (isEmpty()) { return null; }
     DoublesAuxiliary aux = null;
     final double[] quantiles = new double[fRanks.length];
@@ -274,7 +287,7 @@ public abstract class DoublesSketch {
       else if (fRank == 1.0) { quantiles[i] = getMaxValue(); }
       else {
         if (aux == null) {
-          aux = new DoublesAuxiliary(this);
+          aux = new DoublesAuxiliary(this, inclusive);
         }
         quantiles[i] = aux.getQuantile(fRank);
       }
@@ -282,23 +295,43 @@ public abstract class DoublesSketch {
     return quantiles;
   }
 
+  /**
+   * Same as {@link #getQuantiles(double[], boolean) getQuantiles(double[] fRanks, false)}
+   * @param fRanks fractional ranks
+   * @return quantiles
+   */
+  public double[] getQuantiles(final double[] fRanks) {
+    return getQuantiles(fRanks, false);
+  }
+
   /**
    * This is also a more efficient multiple-query version of getQuantile() and allows the caller to
    * specify the number of evenly spaced fractional ranks.
    *
    * <p>If the sketch is empty this returns null.
    *
-   * @param evenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
+   * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
    * This must be a positive integer greater than 1.
    * A value of 2 will return the min and the max value. A value of 3 will return the min,
    * the median and the max value, etc.
    *
+   * @param inclusive if true, fractional ranks are considered inclusive
+   * 
    * @return array of approximations to the given fractions in the same order as given fractions
    * array.
    */
-  public double[] getQuantiles(final int evenlySpaced) {
+  public double[] getQuantiles(final int numEvenlySpaced, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, evenlySpaced));
+    return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced));
+  }
+
+  /**
+   * Same as {@link #getQuantiles(int, boolean) getQuantiles(int numEvenlySpaced, false)}
+   * @param numEvenlySpaced number of evenly spaced fractional ranks
+   * @return quantiles
+   */
+  public double[] getQuantiles(final int numEvenlySpaced) {
+    return getQuantiles(numEvenlySpaced, false);
   }
 
   /**
@@ -311,16 +344,17 @@ public abstract class DoublesSketch {
    * <p>If the sketch is empty this returns NaN.</p>
    *
    * @param value to be ranked
+   * @param inclusive if true the weight of the given value is included into the rank.
    * @return an approximate rank of the given value
    */
-  public double getRank(final double value) {
+  public double getRank(final double value, final boolean inclusive) {
     if (isEmpty()) { return Double.NaN; }
     final DoublesSketchAccessor samples = DoublesSketchAccessor.wrap(this);
     long total = 0;
     int weight = 1;
     samples.setLevel(DoublesSketchAccessor.BB_LVL_IDX);
     for (int i = 0; i < samples.numItems(); i++) {
-      if (samples.get(i) < value) {
+      if (inclusive ? samples.get(i) <= value : samples.get(i) < value) {
         total += weight;
       }
     }
@@ -330,7 +364,7 @@ public abstract class DoublesSketch {
       if ((bitPattern & 1L) > 0) { // level is not empty
         samples.setLevel(lvl);
         for (int i = 0; i < samples.numItems(); i++) {
-          if (samples.get(i) < value) {
+          if (inclusive ? samples.get(i) <= value : samples.get(i) < value) {
             total += weight;
           } else {
             break; // levels are sorted, no point comparing further
@@ -341,6 +375,15 @@ public abstract class DoublesSketch {
     return (double) total / getN();
   }
 
+  /**
+   * Same as {@link #getRank(double, boolean) getRank(double value, false)}
+   * @param value value to be ranked
+   * @return fractional rank
+   */
+  public double getRank(final double value) {
+    return getRank(value, false);
+  }
+
   /**
    * Returns an approximation to the Probability Mass Function (PMF) of the input stream
    * given a set of splitPoints (values).
@@ -357,14 +400,25 @@ public abstract class DoublesSketch {
    * the maximum value.
    * It is not necessary to include either the min or max values in these splitpoints.
    *
+   * @param inclusive if true the weight of the given value is included into the rank.
+   * 
    * @return an array of m+1 doubles each of which is an approximation
    * to the fraction of the input stream values (the mass) that fall into one of those intervals.
    * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right
    * splitPoint, with the exception that the last interval will include maximum value.
    */
-  public double[] getPMF(final double[] splitPoints) {
+  public double[] getPMF(final double[] splitPoints, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, false);
+    return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, false, inclusive);
+  }
+
+  /**
+   * Same as {@link #getPMF(double[], boolean) getPMF(double[] splitPoints, false)}
+   * @param splitPoints splitPoints
+   * @return PMF
+   */
+  public double[] getPMF(final double[] splitPoints) {
+    return getPMF(splitPoints, false);
   }
 
   /**
@@ -383,14 +437,25 @@ public abstract class DoublesSketch {
    * the maximum value.
    * It is not necessary to include either the min or max values in these splitpoints.
    *
+   * @param inclusive if true the weight of the given value is included into the rank.
+   * 
    * @return an array of m+1 double values, which are a consecutive approximation to the CDF
    * of the input stream given the splitPoints. The value at array position j of the returned
    * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
    * array.
    */
-  public double[] getCDF(final double[] splitPoints) {
+  public double[] getCDF(final double[] splitPoints, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, true);
+    return DoublesPmfCdfImpl.getPMFOrCDF(this, splitPoints, true, inclusive);
+  }
+
+  /**
+   * Same as {@link #getCDF(double[], boolean) getCDF(double[] splitPoints, false)}
+   * @param splitPoints splitPoints
+   * @return CDF
+   */
+  public double[] getCDF(final double[] splitPoints) {
+    return getCDF(splitPoints, false);
   }
 
   /**
diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java
index 4905fc22..b28740bf 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsAuxiliary.java
@@ -42,7 +42,7 @@ final class ItemsAuxiliary<T> {
    * @param qs an ItemsSketch
    */
   @SuppressWarnings("unchecked")
-  ItemsAuxiliary(final ItemsSketch<T> qs) {
+  ItemsAuxiliary(final ItemsSketch<T> qs, final boolean inclusive) {
     final int k = qs.getK();
     final long n = qs.getN();
     final long bitPattern = qs.getBitPattern();
@@ -64,9 +64,9 @@ final class ItemsAuxiliary<T> {
 
     // convert the item weights into totals of the weights preceding each item
     long subtot = 0;
-    for (int i = 0; i < (numSamples + 1); i++ ) {
+    for (int i = 0; i < (numSamples + 1); i++) {
       final long newSubtot = subtot + cumWtsArr[i];
-      cumWtsArr[i] = subtot;
+      cumWtsArr[i] = inclusive ? newSubtot : subtot;
       subtot = newSubtot;
     }
 
diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsPmfCdfImpl.java b/src/main/java/org/apache/datasketches/quantiles/ItemsPmfCdfImpl.java
index 4f7c0780..247a3058 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsPmfCdfImpl.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsPmfCdfImpl.java
@@ -24,8 +24,9 @@ import java.util.Comparator;
 
 class ItemsPmfCdfImpl {
 
-  static <T> double[] getPMFOrCDF(final ItemsSketch<T> sketch, final T[] splitPoints, final boolean isCDF) {
-    final double[] buckets = internalBuildHistogram(splitPoints, sketch);
+  static <T> double[] getPMFOrCDF(final ItemsSketch<T> sketch, final T[] splitPoints,
+      final boolean isCDF, final boolean inclusive) {
+    final double[] buckets = internalBuildHistogram(splitPoints, sketch, inclusive);
     final long n = sketch.getN();
     if (isCDF) {
       double subtotal = 0;
@@ -51,7 +52,8 @@ class ItemsPmfCdfImpl {
    * @return the unnormalized, accumulated counts of <i>m + 1</i> intervals.
    */
   @SuppressWarnings("unchecked")
-  private static <T> double[] internalBuildHistogram(final T[] splitPoints, final ItemsSketch<T> sketch) {
+  private static <T> double[] internalBuildHistogram(final T[] splitPoints, final ItemsSketch<T> sketch,
+      final boolean inclusive) {
     final Object[] samples  = sketch.getCombinedBuffer();
     final int bbCount = sketch.getBaseBufferCount();
     ItemsUtil.validateValues(splitPoints, sketch.getComparator());
@@ -63,14 +65,13 @@ class ItemsPmfCdfImpl {
     long weight = 1;
     if (numSplitPoints < 50) { // empirically determined crossover
       // sort not worth it when few split points
-      ItemsPmfCdfImpl.bilinearTimeIncrementHistogramCounters(
-          (T[]) samples, 0, bbCount, weight, splitPoints, counters, sketch.getComparator());
+      bilinearTimeIncrementHistogramCounters(
+          (T[]) samples, 0, bbCount, weight, splitPoints, counters, sketch.getComparator(), inclusive);
     } else {
       // sort is worth it when many split points
       Arrays.sort((T[]) samples, 0, bbCount, sketch.getComparator());
       linearTimeIncrementHistogramCounters(
-          (T[]) samples, 0, bbCount, weight, splitPoints, counters, sketch.getComparator()
-      );
+          (T[]) samples, 0, bbCount, weight, splitPoints, counters, sketch.getComparator(), inclusive);
     }
 
     long myBitPattern = sketch.getBitPattern();
@@ -81,7 +82,7 @@ class ItemsPmfCdfImpl {
       if ((myBitPattern & 1L) > 0L) { //valid level exists
         // the levels are already sorted so we can use the fast version
         linearTimeIncrementHistogramCounters(
-            (T[]) samples, (2 + lvl) * k, k, weight, splitPoints, counters, sketch.getComparator());
+            (T[]) samples, (2 + lvl) * k, k, weight, splitPoints, counters, sketch.getComparator(), inclusive);
       }
     }
     return counters;
@@ -98,17 +99,18 @@ class ItemsPmfCdfImpl {
    * @param splitPoints must be unique and sorted. Number of splitPoints + 1 == counters.length.
    * @param counters array of counters
    * @param comparator the comparator for data type T
+   * @param inclusive if true, rank is considered inclusive
    */
   private static <T> void bilinearTimeIncrementHistogramCounters(final T[] samples, final int offset,
       final int numSamples, final long weight, final T[] splitPoints, final double[] counters,
-      final Comparator<? super T> comparator) {
+      final Comparator<? super T> comparator, final boolean inclusive) {
     assert ((splitPoints.length + 1) == counters.length);
     for (int i = 0; i < numSamples; i++) {
       final T sample = samples[i + offset];
       int j = 0;
       for (j = 0; j < splitPoints.length; j++) {
         final T splitpoint = splitPoints[j];
-        if (comparator.compare(sample, splitpoint) < 0) {
+        if (inclusive ? comparator.compare(sample, splitpoint) <= 0 : comparator.compare(sample, splitpoint) < 0) {
           break;
         }
       }
@@ -133,14 +135,17 @@ class ItemsPmfCdfImpl {
    * @param splitPoints must be unique and sorted. Number of splitPoints + 1 = counters.length.
    * @param counters array of counters
    * @param comparator the comparator for data type T
+   * @param inclusive if true, rank is considered inclusive
    */
   private static <T> void linearTimeIncrementHistogramCounters(final T[] samples, final int offset,
       final int numSamples, final long weight, final T[] splitPoints, final double[] counters,
-      final Comparator<? super T> comparator) {
+      final Comparator<? super T> comparator, final boolean inclusive) {
     int i = 0;
     int j = 0;
     while ((i < numSamples) && (j < splitPoints.length)) {
-      if (comparator.compare(samples[i + offset], splitPoints[j]) < 0) {
+      final T sample = samples[i + offset];
+      final T value = splitPoints[j];
+      if (inclusive ? comparator.compare(sample, value) <= 0 : comparator.compare(sample, value) < 0) {
         counters[j] += weight; // this sample goes into this bucket
         i++; // move on to next sample and see whether it also goes into this bucket
       } else {
diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
index 0726fda8..0400d545 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketch.java
@@ -271,20 +271,31 @@ public final class ItemsSketch<T> {
    * If fraction = 0.0, the true minimum value of the stream is returned.
    * If fraction = 1.0, the true maximum value of the stream is returned.
    *
+   * @param inclusive if true, the given fraction (rank) is considered inclusive
+   * 
    * @return the approximation to the value at the above fraction
    */
-  public T getQuantile(final double fraction) {
+  public T getQuantile(final double fraction, final boolean inclusive) {
     if (fraction < 0.0 || fraction > 1.0) {
       throw new SketchesArgumentException("Fraction cannot be less than zero or greater than 1.0");
     }
     if      (fraction == 0.0) { return minValue_; }
     else if (fraction == 1.0) { return maxValue_; }
     else {
-      final ItemsAuxiliary<T> aux = constructAuxiliary();
+      final ItemsAuxiliary<T> aux = new ItemsAuxiliary<>(this, inclusive);
       return aux.getQuantile(fraction);
     }
   }
 
+  /**
+   * Same as {@link #getQuantile(double, boolean) getQuantile(double fraction, false)}
+   * @param fraction fractional rank
+   * @return quantile
+   */
+  public T getQuantile(final double fraction) {
+    return getQuantile(fraction, false);
+  }
+
   /**
    * Gets the upper bound of the value interval in which the true quantile of the given rank
    * exists with a confidence of at least 99%.
@@ -322,10 +333,12 @@ public final class ItemsSketch<T> {
    * sorted stream of all the input values seen so far.
    * These fRanks must all be in the interval [0.0, 1.0] inclusively.
    *
+   * @param inclusive if true, the given fractional ranks are considered inclusive
+   * 
    * @return array of approximate quantiles of the given fRanks in the same order as in the given
    * fRanks array.
    */
-  public T[] getQuantiles(final double[] fRanks) {
+  public T[] getQuantiles(final double[] fRanks, final boolean inclusive) {
     if (isEmpty()) { return null; }
     ItemsAuxiliary<T> aux = null;
     @SuppressWarnings("unchecked")
@@ -336,7 +349,7 @@ public final class ItemsSketch<T> {
       else if (fRank == 1.0) { quantiles[i] = maxValue_; }
       else {
         if (aux == null) {
-          aux = this.constructAuxiliary();
+          aux = new ItemsAuxiliary<>(this, inclusive);
         }
         quantiles[i] = aux.getQuantile(fRank);
       }
@@ -344,21 +357,41 @@ public final class ItemsSketch<T> {
     return quantiles;
   }
 
+  /**
+   * Same as {@link #getQuantiles(double[], boolean) getQuantiles(double[] fRanks, false)}
+   * @param fRanks fractional ranks
+   * @return quantiles
+   */
+  public T[] getQuantiles(final double[] fRanks) {
+    return getQuantiles(fRanks, false);
+  }
+
   /**
    * This is also a more efficient multiple-query version of getQuantile() and allows the caller to
    * specify the number of evenly spaced fractional ranks.
    *
-   * @param evenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
+   * @param numEvenlySpaced an integer that specifies the number of evenly spaced fractional ranks.
    * This must be a positive integer greater than 1.
    * A value of 2 will return the min and the max value. A value of 3 will return the min,
    * the median and the max value, etc.
    *
+   * @param inclusive if true, fractional ranks are considered inclusive
+   * 
    * @return array of approximations to the given fractions in the same order as given fractions
    * array.
    */
-  public T[] getQuantiles(final int evenlySpaced) {
+  public T[] getQuantiles(final int numEvenlySpaced, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, evenlySpaced));
+    return getQuantiles(org.apache.datasketches.Util.evenlySpaced(0.0, 1.0, numEvenlySpaced), inclusive);
+  }
+
+  /**
+   * Same as {@link #getQuantiles(int, boolean) getQuantiles(int numEvenlySpaced, false)}
+   * @param numEvenlySpaced number of evenly spaced fractional ranks
+   * @return quantiles
+   */
+  public T[] getQuantiles(final int numEvenlySpaced) {
+    return getQuantiles(numEvenlySpaced, false);
   }
 
   /**
@@ -371,15 +404,17 @@ public final class ItemsSketch<T> {
    * <p>If the sketch is empty this returns NaN.</p>
    *
    * @param value to be ranked
+   * @param inclusive if true the weight of the given value is included into the rank.
    * @return an approximate rank of the given value
    */
   @SuppressWarnings("unchecked")
-  public double getRank(final T value) {
+  public double getRank(final T value, final boolean inclusive) {
     if (isEmpty()) { return Double.NaN; }
     long total = 0;
     int weight = 1;
     for (int i = 0; i < baseBufferCount_; i++) {
-      if (comparator_.compare((T) combinedBuffer_[i], value) < 0) {
+      final T sample = (T) combinedBuffer_[i];
+      if (inclusive ? comparator_.compare(sample, value) <= 0 : comparator_.compare(sample, value) < 0) {
         total += weight;
       }
     }
@@ -389,7 +424,8 @@ public final class ItemsSketch<T> {
       if ((bitPattern & 1L) > 0) { // level is not empty
         final int offset = (2 + lvl) * k_;
         for (int i = 0; i < k_; i++) {
-          if (comparator_.compare((T) combinedBuffer_[i + offset], value) < 0) {
+          final T sample = (T) combinedBuffer_[i + offset];
+          if (inclusive ? comparator_.compare(sample, value) <= 0 : comparator_.compare(sample, value) < 0) {
             total += weight;
           } else {
             break; // levels are sorted, no point comparing further
@@ -400,6 +436,10 @@ public final class ItemsSketch<T> {
     return (double) total / n_;
   }
 
+  public double getRank(final T value) {
+    return getRank(value, false);
+  }
+
   /**
    * Returns an approximation to the Probability Mass Function (PMF) of the input stream
    * given a set of splitPoints (values).
@@ -416,14 +456,25 @@ public final class ItemsSketch<T> {
    * the maximum value.
    * It is not necessary to include either the min or max values in these splitpoints.
    *
+   * @param inclusive if true the weight of the given value is included into the rank.
+   * 
    * @return an array of m+1 doubles each of which is an approximation
    * to the fraction of the input stream values (the mass) that fall into one of those intervals.
    * The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right
    * splitPoint, with the exception that the last interval will include maximum value.
    */
-  public double[] getPMF(final T[] splitPoints) {
+  public double[] getPMF(final T[] splitPoints, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return ItemsPmfCdfImpl.getPMFOrCDF(this, splitPoints, false);
+    return ItemsPmfCdfImpl.getPMFOrCDF(this, splitPoints, false, inclusive);
+  }
+
+  /**
+   * Same as {@link #getPMF(T[], boolean) getPMF(T[] splitPoints, false)}
+   * @param splitPoints splitPoints
+   * @return PMF
+   */
+  public double[] getPMF(final T[] splitPoints) {
+    return getPMF(splitPoints, false);
   }
 
   /**
@@ -442,14 +493,25 @@ public final class ItemsSketch<T> {
    * the maximum value.
    * It is not necessary to include either the min or max values in these splitpoints.
    *
+   * @param inclusive if true the weight of the given value is included into the rank.
+   * 
    * @return an array of m+1 double values, which are a consecutive approximation to the CDF
    * of the input stream given the splitPoints. The value at array position j of the returned
    * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
    * array.
    */
-  public double[] getCDF(final T[] splitPoints) {
+  public double[] getCDF(final T[] splitPoints, final boolean inclusive) {
     if (isEmpty()) { return null; }
-    return ItemsPmfCdfImpl.getPMFOrCDF(this, splitPoints, true);
+    return ItemsPmfCdfImpl.getPMFOrCDF(this, splitPoints, true, inclusive);
+  }
+
+  /**
+   * Same as {@link #getCDF(T[], boolean) getCDF(T[] splitPoints, false)}
+   * @param splitPoints splitPoints
+   * @return CDF
+   */
+  public double[] getCDF(final T[] splitPoints) {
+    return getCDF(splitPoints, false);
   }
 
   /**
@@ -723,15 +785,6 @@ public final class ItemsSketch<T> {
     }
   }
 
-  /**
-   * Returns the Auxiliary data structure which is only used for getQuantile() and getQuantiles()
-   * queries.
-   * @return the Auxiliary data structure
-   */
-  private ItemsAuxiliary<T> constructAuxiliary() {
-    return new ItemsAuxiliary<>(this);
-  }
-
   private static <T> void growBaseBuffer(final ItemsSketch<T> sketch) {
     final Object[] baseBuffer = sketch.getCombinedBuffer();
     final int oldSize = sketch.getCombinedBufferAllocatedCount();
diff --git a/src/main/java/org/apache/datasketches/quantiles/KolmogorovSmirnov.java b/src/main/java/org/apache/datasketches/quantiles/KolmogorovSmirnov.java
index 2217fb52..d1cffa38 100644
--- a/src/main/java/org/apache/datasketches/quantiles/KolmogorovSmirnov.java
+++ b/src/main/java/org/apache/datasketches/quantiles/KolmogorovSmirnov.java
@@ -34,8 +34,8 @@ final class KolmogorovSmirnov {
    * @return the raw delta area between two quantile sketches
    */
   public static double computeKSDelta(final DoublesSketch sketch1, final DoublesSketch sketch2) {
-    final DoublesAuxiliary p = new DoublesAuxiliary(sketch1);
-    final DoublesAuxiliary q = new DoublesAuxiliary(sketch2);
+    final DoublesAuxiliary p = new DoublesAuxiliary(sketch1, false);
+    final DoublesAuxiliary q = new DoublesAuxiliary(sketch2, false);
 
     final double[] pSamplesArr = p.auxSamplesArr_;
     final double[] qSamplesArr = q.auxSamplesArr_;
diff --git a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java
index de08b9c4..563885a8 100644
--- a/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/HeapUpdateDoublesSketchTest.java
@@ -116,7 +116,7 @@ public class HeapUpdateDoublesSketchTest {
     for (int k = 2; k <= 32; k *= 2) {
       HeapUpdateDoublesSketch qs = HeapUpdateDoublesSketch.newInstance(k);
       for (int numItemsSoFar = 0; numItemsSoFar < 1000; numItemsSoFar++) {
-        DoublesAuxiliary aux = new DoublesAuxiliary(qs);
+        DoublesAuxiliary aux = new DoublesAuxiliary(qs, false);
         int numSamples = qs.getRetainedItems();
         double[] auxItems = aux.auxSamplesArr_;
         long[] auxAccum = aux.auxCumWtsArr_;
@@ -984,9 +984,17 @@ public class HeapUpdateDoublesSketchTest {
       sketch.update(i);
       values[i] = i;
     }
-    final double[] ranks = sketch.getCDF(values);
-    for (int i = 0; i < n; i++) {
-      assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
+    { // inclusive = false (default)
+      final double[] ranks = sketch.getCDF(values);
+      for (int i = 0; i < n; i++) {
+        assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
+      }
+    }
+    { // inclusive = true
+      final double[] ranks = sketch.getCDF(values, true);
+      for (int i = 0; i < n; i++) {
+        assertEquals(ranks[i], sketch.getRank(values[i], true), 0.00001, "CDF vs rank for value " + i);
+      }
     }
   }
 
@@ -1025,6 +1033,62 @@ public class HeapUpdateDoublesSketchTest {
     assertEquals(kEpsPmf, k);
   }
 
+  @Test
+  public void tenItems() {
+    final UpdateDoublesSketch sketch = DoublesSketch.builder().build();
+    for (int i = 1; i <= 10; i++) { sketch.update(i); }
+    assertFalse(sketch.isEmpty());
+    assertEquals(sketch.getN(), 10);
+    assertEquals(sketch.getRetainedItems(), 10);
+    for (int i = 1; i <= 10; i++) {
+      assertEquals(sketch.getRank(i), (i - 1) / 10.0);
+      assertEquals(sketch.getRank(i, false), (i - 1) / 10.0);
+      assertEquals(sketch.getRank(i, true), i / 10.0);
+    }
+    // inclusive = false (default)
+    assertEquals(sketch.getQuantile(0), 1); // always min value
+    assertEquals(sketch.getQuantile(0.1), 2);
+    assertEquals(sketch.getQuantile(0.2), 3);
+    assertEquals(sketch.getQuantile(0.3), 4);
+    assertEquals(sketch.getQuantile(0.4), 5);
+    assertEquals(sketch.getQuantile(0.5), 6);
+    assertEquals(sketch.getQuantile(0.6), 7);
+    assertEquals(sketch.getQuantile(0.7), 8);
+    assertEquals(sketch.getQuantile(0.8), 9);
+    assertEquals(sketch.getQuantile(0.9), 10);
+    assertEquals(sketch.getQuantile(1), 10); // always max value
+    // inclusive = true
+    assertEquals(sketch.getQuantile(0, true), 1); // always min value
+    assertEquals(sketch.getQuantile(0.1, true), 1);
+    assertEquals(sketch.getQuantile(0.2, true), 2);
+    assertEquals(sketch.getQuantile(0.3, true), 3);
+    assertEquals(sketch.getQuantile(0.4, true), 4);
+    assertEquals(sketch.getQuantile(0.5, true), 5);
+    assertEquals(sketch.getQuantile(0.6, true), 6);
+    assertEquals(sketch.getQuantile(0.7, true), 7);
+    assertEquals(sketch.getQuantile(0.8, true), 8);
+    assertEquals(sketch.getQuantile(0.9, true), 9);
+    assertEquals(sketch.getQuantile(1, true), 10); // always max value
+
+    // getQuantile() and getQuantiles() equivalence
+    {
+      // inclusive = false (default)
+      final double[] quantiles =
+          sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1});
+      for (int i = 0; i <= 10; i++) {
+        assertEquals(sketch.getQuantile(i / 10.0), quantiles[i]);
+      }
+    }
+    {
+      // inclusive = true
+      final double[] quantiles =
+          sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, true);
+      for (int i = 0; i <= 10; i++) {
+        assertEquals(sketch.getQuantile(i / 10.0, true), quantiles[i]);
+      }
+    }
+  }
+
   //private methods
 
   private static void checksForImproperK(final int k) {
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
index df4e864c..c50c1a32 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
@@ -20,6 +20,7 @@
 package org.apache.datasketches.quantiles;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
 
 import java.util.Arrays;
 import java.util.Comparator;
@@ -108,6 +109,62 @@ public class ItemsSketchTest {
     Assert.assertNull(sketch.getQuantile(0.5));
   }
 
+  @Test
+  public void tenItems() {
+    final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(128, Comparator.naturalOrder());
+    for (int i = 1; i <= 10; i++) { sketch.update(i); }
+    assertFalse(sketch.isEmpty());
+    assertEquals(sketch.getN(), 10);
+    assertEquals(sketch.getRetainedItems(), 10);
+    for (int i = 1; i <= 10; i++) {
+      assertEquals(sketch.getRank(i), (i - 1) / 10.0);
+      assertEquals(sketch.getRank(i, false), (i - 1) / 10.0);
+      assertEquals(sketch.getRank(i, true), i / 10.0);
+    }
+    // inclusive = false (default)
+    assertEquals(sketch.getQuantile(0), 1); // always min value
+    assertEquals(sketch.getQuantile(0.1), 2);
+    assertEquals(sketch.getQuantile(0.2), 3);
+    assertEquals(sketch.getQuantile(0.3), 4);
+    assertEquals(sketch.getQuantile(0.4), 5);
+    assertEquals(sketch.getQuantile(0.5), 6);
+    assertEquals(sketch.getQuantile(0.6), 7);
+    assertEquals(sketch.getQuantile(0.7), 8);
+    assertEquals(sketch.getQuantile(0.8), 9);
+    assertEquals(sketch.getQuantile(0.9), 10);
+    assertEquals(sketch.getQuantile(1), 10); // always max value
+    // inclusive = true
+    assertEquals(sketch.getQuantile(0, true), 1); // always min value
+    assertEquals(sketch.getQuantile(0.1, true), 1);
+    assertEquals(sketch.getQuantile(0.2, true), 2);
+    assertEquals(sketch.getQuantile(0.3, true), 3);
+    assertEquals(sketch.getQuantile(0.4, true), 4);
+    assertEquals(sketch.getQuantile(0.5, true), 5);
+    assertEquals(sketch.getQuantile(0.6, true), 6);
+    assertEquals(sketch.getQuantile(0.7, true), 7);
+    assertEquals(sketch.getQuantile(0.8, true), 8);
+    assertEquals(sketch.getQuantile(0.9, true), 9);
+    assertEquals(sketch.getQuantile(1, true), 10); // always max value
+
+    // getQuantile() and getQuantiles() equivalence
+    {
+      // inclusive = false (default)
+      final Integer[] quantiles =
+          sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1});
+      for (int i = 0; i <= 10; i++) {
+        assertEquals(sketch.getQuantile(i / 10.0), quantiles[i]);
+      }
+    }
+    {
+      // inclusive = true
+      final Integer[] quantiles =
+          sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, true);
+      for (int i = 0; i <= 10; i++) {
+        assertEquals(sketch.getQuantile(i / 10.0, true), quantiles[i]);
+      }
+    }
+  }
+
   @Test
   public void estimation() {
     final ItemsSketch<Integer> sketch = ItemsSketch.getInstance(128, Comparator.naturalOrder());
@@ -432,9 +489,17 @@ public class ItemsSketchTest {
       sketch.update(i);
       values[i] = i;
     }
-    final double[] ranks = sketch.getCDF(values);
-    for (int i = 0; i < n; i++) {
-      Assert.assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
+    { // inclusive = false (default)
+      final double[] ranks = sketch.getCDF(values);
+      for (int i = 0; i < n; i++) {
+        Assert.assertEquals(ranks[i], sketch.getRank(values[i]), 0.00001, "CDF vs rank for value " + i);
+      }
+    }
+    { // inclusive = true
+      final double[] ranks = sketch.getCDF(values, true);
+      for (int i = 0; i < n; i++) {
+        Assert.assertEquals(ranks[i], sketch.getRank(values[i], true), 0.00001, "CDF vs rank for value " + i);
+      }
     }
   }
 
diff --git a/src/test/java/org/apache/datasketches/quantiles/UtilTest.java b/src/test/java/org/apache/datasketches/quantiles/UtilTest.java
index 275ed7e4..54aed0e0 100644
--- a/src/test/java/org/apache/datasketches/quantiles/UtilTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/UtilTest.java
@@ -90,7 +90,7 @@ public class UtilTest {
       final double[] splitPoints = {0.25, 0.4};
       final double[] counters = {0, 0, 0};
       final long[] answers = {200, 100, 200};
-      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);
@@ -102,7 +102,7 @@ public class UtilTest {
       final double[] splitPoints = {0.01, 0.02};
       final double[] counters = {0, 0, 0};
       final long[] answers = {0, 0, 500};
-      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);
@@ -114,7 +114,7 @@ public class UtilTest {
       final double[] splitPoints = {0.8, 0.9};
       final double[] counters = {0, 0, 0};
       final long[] answers = {500, 0, 0};
-      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.bilinearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);
@@ -136,7 +136,7 @@ public class UtilTest {
       final double[] splitPoints = {0.25, 0.4};
       final double[] counters = {0, 0, 0};
       final long[] answers = {200, 100, 200};
-      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);
@@ -148,7 +148,7 @@ public class UtilTest {
       final double[] splitPoints = {0.01, 0.02};
       final double[] counters = {0, 0, 0};
       final long[] answers = {0, 0, 500};
-      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);
@@ -160,7 +160,7 @@ public class UtilTest {
       final double[] splitPoints = {0.8, 0.9};
       final double[] counters = {0, 0, 0};
       final long[] answers = {500, 0, 0};
-      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters);
+      DoublesPmfCdfImpl.linearTimeIncrementHistogramCounters(accessor, 100, splitPoints, counters, false);
       for (int j = 0; j < counters.length; j++) {
         assertEquals(counters[j], answers[j], 0.00001);
         // System.out.printf ("counter[%d] = %d%n", j, counters[j]);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org