You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2022/07/29 03:56:20 UTC
[datasketches-java] 08/08: Update REQ to use the new QuantileSearchCriteria.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch More_changes_to_REQ
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 856926ff941c87a34b4c624a50fadf6c41c1eb29
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Thu Jul 28 20:29:17 2022 -0700
Update REQ to use the new QuantileSearchCriteria.
---
.../org/apache/datasketches/InequalitySearch.java | 2 +
.../datasketches/QuantileSearchCriteria.java | 74 +++++++++++++
.../org/apache/datasketches/req/BaseReqSketch.java | 48 +++++----
.../org/apache/datasketches/req/FloatBuffer.java | 10 +-
.../org/apache/datasketches/req/ReqSketch.java | 34 +++---
.../datasketches/req/ReqSketchSortedView.java | 16 ++-
.../req/ReqSketchSortedViewIterator.java | 10 +-
.../datasketches/CrossCheckQuantilesTest.java | 117 +++++++++++----------
...loatBufferTest.java => ReqFloatBufferTest.java} | 18 ++--
.../datasketches/req/ReqSketchSortedViewTest.java | 11 +-
.../org/apache/datasketches/req/ReqSketchTest.java | 36 ++++---
11 files changed, 244 insertions(+), 132 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/InequalitySearch.java b/src/main/java/org/apache/datasketches/InequalitySearch.java
index 278a3383..19726697 100644
--- a/src/main/java/org/apache/datasketches/InequalitySearch.java
+++ b/src/main/java/org/apache/datasketches/InequalitySearch.java
@@ -37,6 +37,8 @@ import java.util.Objects;
* <p>Given a sorted array of values <i>arr[]</i> and the search key value <i>v</i>, the algorithms for
* the searching criteria are given with each enum criterion.</p>
*
+ * @see <a href="https://datasketches.apache.org/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">
+ * Sketching Quantiles and Ranks Tutorial</a>
* @author Lee Rhodes
*/
public enum InequalitySearch {
diff --git a/src/main/java/org/apache/datasketches/QuantileSearchCriteria.java b/src/main/java/org/apache/datasketches/QuantileSearchCriteria.java
new file mode 100644
index 00000000..c9335e92
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/QuantileSearchCriteria.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches;
+
+/**
+ * These criteria are used by all the quantiles algorithms in the library.
+ * <ul>
+ * <li><b>Definition of Non Inclusive <i>getRank(q)</i> search:</b><br>
+ * Given quantile <i>q</i>, return the rank, <i>r</i>, of the largest quantile that is strictly less than <i>q</i>.
+ * </li>
+ *
+ * <li><b>Definition of Inclusive <i>getRank(q)</i> search:</b><br>
+ * Given quantile <i>q</i>, return the rank, <i>r</i>, of the largest quantile that is less than or equal to <i>q</i>.
+ * </li>
+ *
+ * <li><b>Definition of Non Inclusive Search <i>getQuantile(r)</i> search:</b><br>
+ * Given rank <i>r</i>, return the quantile of the smallest rank that is strictly greater than <i>r</i>.
+ * </li>
+ *
+ * <li><b>Definition of Inclusive Search <i>getQuantile(r)</i> search:</b><br>
+ * Given rank <i>r</i>, return the quantile of the smallest rank that is strictly greater than or equal to <i>r</i>.
+ * </li>
+ * </ul>
+
+ * @see <a href="https://datasketches.apache.org/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">
+ * Sketching Quantiles and Ranks Tutorial</a>
+ *
+ * @author Lee Rhodes
+ */
+public enum QuantileSearchCriteria {
+
+ /**
+ * See definition of Non Inclusive above.
+ *
+ * <p>For a non inclusive, strict getQuantile(rank) type search, If the given rank is equal to 1.0,
+ * there is no quantile that satisfies this criterion,
+ * the getQuantile(rank) method will return a NaN.</p>
+ */
+ NON_INCLUSIVE_STRICT,
+
+ /**
+ * See definition of Non Inclusive above.
+ *
+ * <p>For a non inclusive, getQuantile(rank) type search, If the given rank is is equal to 1.0,
+ * there is no quantile that satisfies this criterion, however,
+ * the method will return the largest quantile value retained by the sketch as a convenience.</p>
+ *
+ * <p>This is not strictly mathematically correct, but very convenient as it most often what we expect.</p>
+ */
+ NON_INCLUSIVE,
+
+ /**
+ * See definition of Inclusive above.
+ */
+ INCLUSIVE
+}
+
diff --git a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java
index 10b51420..3538bd6b 100644
--- a/src/main/java/org/apache/datasketches/req/BaseReqSketch.java
+++ b/src/main/java/org/apache/datasketches/req/BaseReqSketch.java
@@ -19,14 +19,26 @@
package org.apache.datasketches.req;
+import org.apache.datasketches.QuantileSearchCriteria;
+
/**
* This abstract class provides a single place to define and document the public API
* for the Relative Error Quantiles Sketch.
*
+ * @see <a href="https://datasketches.apache.org/docs/Quantiles/SketchingQuantilesAndRanksTutorial.html">
+ * Sketching Quantiles and Ranks Tutorial</a>
+ *
* @author Lee Rhodes
*/
abstract class BaseReqSketch {
+ /**
+ * Same as {@link #getCDF(float[], QuantileSearchCriteria) getCDF(float[] splitPoints, QuantileSearchCriteria)}
+ * @param splitPoints splitPoints
+ * @return CDF
+ */
+ public abstract double[] getCDF(float[] splitPoints);
+
/**
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
* cumulative analog of the PMF, of the input stream given a set of splitPoint (values).
@@ -40,24 +52,17 @@ abstract class BaseReqSketch {
* that divide the real number line into <i>m+1</i> consecutive disjoint intervals.
* The definition of an "interval" is inclusive of the left splitPoint (or minimum value) and
* exclusive of the right splitPoint, with the exception that the last interval will include
- * the maximum value.
+ * the largest value retained by the sketch.
* It is not necessary to include either the min or max values in these split points.
*
- * @param inclusive if true the weight of a given value is included into its rank.
+ * @param inclusive if true, the weight of a given value is included into its rank.
*
* @return an array of m+1 double values, which are a consecutive approximation to the CDF
* of the input stream given the splitPoints. The value at array position j of the returned
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
* array.
*/
- public abstract double[] getCDF(float[] splitPoints, boolean inclusive);
-
- /**
- * Same as {@link #getCDF(float[], boolean) getCDF(float[] splitPoints, isLessThanOrEqual())}
- * @param splitPoints splitPoints
- * @return CDF
- */
- public abstract double[] getCDF(float[] splitPoints);
+ public abstract double[] getCDF(float[] splitPoints, QuantileSearchCriteria inclusive);
/**
* If true, the high ranks are prioritized for better accuracy. Otherwise
@@ -119,12 +124,12 @@ abstract class BaseReqSketch {
* @return an array of m+1 doubles each of which is an approximation
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
* The definition of an "interval" is inclusive of the left splitPoint and exclusive of the right
- * splitPoint, with the exception that the last interval will include maximum value.
+ * splitPoint, with the exception that the last interval will include the largest value retained by the sketch.
*/
- public abstract double[] getPMF(float[] splitPoints, boolean inclusive);
+ public abstract double[] getPMF(float[] splitPoints, QuantileSearchCriteria inclusive);
/**
- * Same as {@link #getPMF(float[], boolean) getPMF(float[] splitPoints, isLessThanOrEqual())}
+ * Same as {@link #getPMF(float[], QuantileSearchCriteria) getPMF(float[] splitPoints, QuantileSearchCriteria)}
* @param splitPoints splitPoints
* @return PMF
*/
@@ -137,10 +142,10 @@ abstract class BaseReqSketch {
* @param inclusive if true, the given rank is considered inclusive.
* @return the approximate quantile given the normalized rank.
*/
- public abstract float getQuantile(double normRank, boolean inclusive);
+ public abstract float getQuantile(double normRank, QuantileSearchCriteria inclusive);
/**
- * Same as {@link #getQuantile(double, boolean) getQuantile(double fraction, isLessThanOrEqual())}
+ * Same as {@link #getQuantile(double, QuantileSearchCriteria) getQuantile(double fraction, QuantileSearchCriteria)}
* @param normRank fractional rank
* @return quantile
*/
@@ -153,10 +158,11 @@ abstract class BaseReqSketch {
* @return the array of quantiles that correspond to the given array of normalized ranks.
* See <i>getQuantile(double)</i>
*/
- public abstract float[] getQuantiles(double[] normRanks, boolean inclusive);
+ public abstract float[] getQuantiles(double[] normRanks, QuantileSearchCriteria inclusive);
/**
- * Same as {@link #getQuantiles(double[], boolean) getQuantiles(double[] fractions, isLessThanOrEqual())}
+ * Same as {@link #getQuantiles(double[], QuantileSearchCriteria)
+ * getQuantiles(double[] fractions, QuantileSearchCriteria)}
* @param normRanks normalized ranks
* @return quantiles
*/
@@ -170,10 +176,10 @@ abstract class BaseReqSketch {
* @param inclusive if true the weight of the given value is included into its rank.
* @return the normalized rank of the given value in the stream.
*/
- public abstract double getRank(float value, boolean inclusive);
+ public abstract double getRank(float value, QuantileSearchCriteria inclusive);
/**
- * Same as {@link #getRank(float, boolean) getRank(float value, isLessThanOrEqual())}
+ * Same as {@link #getRank(float, QuantileSearchCriteria) getRank(float value, QuantileSearchCriteria)}
* @param value value to be ranked
* @return normalized rank
*/
@@ -194,10 +200,10 @@ abstract class BaseReqSketch {
* @return the array of normalized ranks that correspond to the given array of values.
* See <i>getRank(float)</i>
*/
- public abstract double[] getRanks(float[] values, boolean inclusive);
+ public abstract double[] getRanks(float[] values, QuantileSearchCriteria inclusive);
/**
- * Same as {@link #getRanks(float[], boolean) getRanks(float[] values, isLessThanOrEqual())}
+ * Same as {@link #getRanks(float[], QuantileSearchCriteria) getRanks(float[] values, QuantileSearchCriteria)}
* @param values the given array of values to be ranked
* @return array of normalized ranks
*/
diff --git a/src/main/java/org/apache/datasketches/req/FloatBuffer.java b/src/main/java/org/apache/datasketches/req/FloatBuffer.java
index 6d86316c..a3246d88 100755
--- a/src/main/java/org/apache/datasketches/req/FloatBuffer.java
+++ b/src/main/java/org/apache/datasketches/req/FloatBuffer.java
@@ -21,6 +21,10 @@ package org.apache.datasketches.req;
import java.util.Arrays;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+
+import org.apache.datasketches.QuantileSearchCriteria;
+
import org.apache.datasketches.InequalitySearch;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.WritableBuffer;
@@ -202,10 +206,10 @@ class FloatBuffer {
* Returns the count of items based on the given criteria.
* Also used in test.
* @param value the given value
- * @param ltEq the chosen criterion: LT or LE
+ * @param inclusive the chosen criterion: LT, LT Strict, or LE
* @return count of items based on the given criterion.
*/
- int getCountWithCriterion(final float value, final boolean ltEq) {
+ int getCountWithCriterion(final float value, final QuantileSearchCriteria inclusive) {
assert !Float.isNaN(value) : "Float values must not be NaN.";
if (!sorted_) { sort(); } //we must be sorted!
int low = 0; //Initialized to space at top
@@ -214,7 +218,7 @@ class FloatBuffer {
low = capacity_ - count_;
high = capacity_ - 1;
}
- final InequalitySearch crit = ltEq ? InequalitySearch.LE : InequalitySearch.LT;
+ final InequalitySearch crit = (inclusive == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT;
final int index = InequalitySearch.find(arr_, low, high, value, crit);
return index == -1 ? 0 : index - low + 1;
}
diff --git a/src/main/java/org/apache/datasketches/req/ReqSketch.java b/src/main/java/org/apache/datasketches/req/ReqSketch.java
index 22328099..f17668af 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSketch.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSketch.java
@@ -19,10 +19,14 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import org.apache.datasketches.QuantileSearchCriteria;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
@@ -230,11 +234,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public double[] getCDF(final float[] splitPoints) {
- return getCDF(splitPoints, ltEq);
+ return getCDF(splitPoints, QuantileSearchCriteria.NON_INCLUSIVE);
}
@Override
- public double[] getCDF(final float[] splitPoints, final boolean inclusive) {
+ public double[] getCDF(final float[] splitPoints, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return null; }
final int numBkts = splitPoints.length + 1;
final double[] outArr = new double[numBkts];
@@ -267,11 +271,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public double[] getPMF(final float[] splitPoints) {
- return getPMF(splitPoints, ltEq);
+ return getPMF(splitPoints, ltEq == true ? INCLUSIVE : NON_INCLUSIVE);
}
@Override
- public double[] getPMF(final float[] splitPoints, final boolean inclusive) {
+ public double[] getPMF(final float[] splitPoints, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return null; }
final int numBkts = splitPoints.length + 1;
final double[] outArr = new double[numBkts];
@@ -285,11 +289,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public float getQuantile(final double normRank) {
- return getQuantile(normRank, ltEq);
+ return getQuantile(normRank, ltEq == true ? INCLUSIVE : NON_INCLUSIVE );
}
@Override
- public float getQuantile(final double normRank, final boolean inclusive) {
+ public float getQuantile(final double normRank, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return Float.NaN; }
if (normRank < 0 || normRank > 1.0) {
throw new SketchesArgumentException(
@@ -303,11 +307,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public float[] getQuantiles(final double[] normRanks) {
- return getQuantiles(normRanks, ltEq);
+ return getQuantiles(normRanks, ltEq == true ? INCLUSIVE : NON_INCLUSIVE);
}
@Override
- public float[] getQuantiles(final double[] normRanks, final boolean inclusive) {
+ public float[] getQuantiles(final double[] normRanks, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return null; }
final int len = normRanks.length;
final float[] qArr = new float[len];
@@ -319,11 +323,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public double getRank(final float value) {
- return getRank(value, ltEq);
+ return getRank(value, ltEq == true ? INCLUSIVE : NON_INCLUSIVE);
}
@Override
- public double getRank(final float value, final boolean inclusive) {
+ public double getRank(final float value, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return Double.NaN; }
final long nnCount = getCount(value, inclusive);
return (double)nnCount / totalN;
@@ -336,11 +340,11 @@ public class ReqSketch extends BaseReqSketch {
@Override
public double[] getRanks(final float[] values) {
- return getRanks(values, ltEq);
+ return getRanks(values, ltEq == true ? INCLUSIVE : NON_INCLUSIVE);
}
@Override
- public double[] getRanks(final float[] values, final boolean inclusive) {
+ public double[] getRanks(final float[] values, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return null; }
final int numValues = values.length;
final double[] retArr = new double[numValues];
@@ -586,7 +590,7 @@ public class ReqSketch extends BaseReqSketch {
if (reqDebug != null) { reqDebug.emitCompressDone(); }
}
- private long getCount(final float value, final boolean inclusive) {
+ private long getCount(final float value, final QuantileSearchCriteria inclusive) {
if (isEmpty()) { return 0; }
final int numComp = compactors.size();
long cumNnr = 0;
@@ -599,7 +603,7 @@ public class ReqSketch extends BaseReqSketch {
return cumNnr;
}
- private long[] getCounts(final float[] values, final boolean inclusive) {
+ private long[] getCounts(final float[] values, final QuantileSearchCriteria inclusive) {
final int numValues = values.length;
final int numComp = compactors.size();
final long[] cumNnrArr = new long[numValues];
@@ -621,7 +625,7 @@ public class ReqSketch extends BaseReqSketch {
* @param inclusive if true the weight of a given value is included into its rank
* @return a CDF in raw counts
*/
- private long[] getPMForCDF(final float[] splits, final boolean inclusive) {
+ private long[] getPMForCDF(final float[] splits, final QuantileSearchCriteria inclusive) {
validateSplits(splits);
final int numSplits = splits.length;
final long[] splitCounts = getCounts(splits, inclusive);
diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
index 4ee07afd..96af7a44 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedView.java
@@ -19,10 +19,15 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE_STRICT;
+
import java.util.Arrays;
import java.util.List;
import org.apache.datasketches.InequalitySearch;
+import org.apache.datasketches.QuantileSearchCriteria;
/**
* The Sorted View provides a view of the data retained by the sketch that would be cumbersome to get any other way.
@@ -81,13 +86,14 @@ public class ReqSketchSortedView {
* @param inclusive determines the search criterion used.
* @return the quantile
*/
- public float getQuantile(final double normRank, final boolean inclusive) {
+ public float getQuantile(final double normRank, final QuantileSearchCriteria inclusive) {
final int len = cumWeights.length;
final long rank = (int)(normRank * N);
- final InequalitySearch crit = inclusive ? InequalitySearch.GE : InequalitySearch.GT;
+ final InequalitySearch crit = (inclusive == INCLUSIVE) ? InequalitySearch.GE : InequalitySearch.GT;
final int index = InequalitySearch.find(cumWeights, 0, len - 1, rank, crit);
if (index == -1) {
- return values[len - 1]; //GT: normRank >= 1.0; GE: normRank > 1.0
+ if (inclusive == NON_INCLUSIVE_STRICT) { return Float.NaN; } //GT: normRank == 1.0;
+ if (inclusive == NON_INCLUSIVE) { return values[len - 1]; }
}
return values[index];
}
@@ -98,9 +104,9 @@ public class ReqSketchSortedView {
* @param inclusive determines the search criterion used.
* @return the normalized rank
*/
- public double getRank(final float value, final boolean inclusive) {
+ public double getRank(final float value, final QuantileSearchCriteria inclusive) {
final int len = values.length;
- final InequalitySearch crit = inclusive ? InequalitySearch.LE : InequalitySearch.LT;
+ final InequalitySearch crit = (inclusive == INCLUSIVE) ? InequalitySearch.LE : InequalitySearch.LT;
final int index = InequalitySearch.find(values, 0, len - 1, value, crit);
if (index == -1) {
return 0; //LT: value <= minValue; LE: value < minValue
diff --git a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java
index 19437ae8..1bc81146 100644
--- a/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java
+++ b/src/main/java/org/apache/datasketches/req/ReqSketchSortedViewIterator.java
@@ -20,6 +20,8 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import org.apache.datasketches.QuantileSearchCriteria;
/**
* Iterator over KllDoublesSketchSortedView.
*
@@ -66,9 +68,9 @@ public class ReqSketchSortedViewIterator {
* Otherwise, returns the cumulative weightof the previous value.
* @return cumulative weight for the current value.
*/
- public long getCumulativeWeight(final boolean inclusive) {
- return inclusive ? cumWeights[index]
- : (index == 0) ? 0 : cumWeights[index - 1];
+ public long getCumulativeWeight(final QuantileSearchCriteria inclusive) {
+ if (inclusive == INCLUSIVE) { return cumWeights[index]; }
+ return (index == 0) ? 0 : cumWeights[index - 1];
}
/**
@@ -80,7 +82,7 @@ public class ReqSketchSortedViewIterator {
* Otherwise, returns the normalized rank of the previous value.
* @return normalized rank for the current value or previous value.
*/
- public double getNormalizedRank(final boolean inclusive) {
+ public double getNormalizedRank(final QuantileSearchCriteria inclusive) {
return (double) getCumulativeWeight(inclusive) / totalN;
}
diff --git a/src/test/java/org/apache/datasketches/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/CrossCheckQuantilesTest.java
index 945e9f98..545a40d6 100644
--- a/src/test/java/org/apache/datasketches/CrossCheckQuantilesTest.java
+++ b/src/test/java/org/apache/datasketches/CrossCheckQuantilesTest.java
@@ -26,6 +26,9 @@ import static org.apache.datasketches.CrossCheckQuantilesTest.SkType.KLL;
import static org.apache.datasketches.CrossCheckQuantilesTest.SkType.REQ;
import static org.apache.datasketches.CrossCheckQuantilesTest.SkType.REQ_NO_DEDUP;
import static org.apache.datasketches.CrossCheckQuantilesTest.SkType.REQ_SV;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE_STRICT;
import static org.testng.Assert.assertEquals;
import org.apache.datasketches.kll.KllFloatsSketch;
@@ -44,8 +47,6 @@ public class CrossCheckQuantilesTest {
final int k = 32; //all sketches are in exact mode
final boolean hra = false; //for the REQ sketch
- final boolean INCLUSIVE = true;
- final boolean NON_INCLUSIVE = false;
@Test
public void checkQuantileSketches() {
@@ -73,14 +74,14 @@ public class CrossCheckQuantilesTest {
double[] testQuantileDResults_NI = null;
double[] testQuantileDResults_I = null;
- private void checkQAndR(final SkType skType, final PrimType type, final boolean inclusive) {
+ private void checkQAndR(final SkType skType, final PrimType type, final QuantileSearchCriteria inclusive) {
String head = "CHECK " + skType.toString();
- if (inclusive) { println("\n---------- " + head + " INCLUSIVE ----------\n"); }
+ if (inclusive == INCLUSIVE) { println("\n---------- " + head + " INCLUSIVE ----------\n"); }
else { println("\n########## " + head + " NON-INCLUSIVE ##########\n"); }
//CREATE EMPTY SKETCHES
ReqSketchBuilder reqBldr = ReqSketch.builder();
- reqBldr.setK(k).setHighRankAccuracy(hra).setLessThanOrEqual(inclusive);
+ reqBldr.setK(k).setHighRankAccuracy(hra).setLessThanOrEqual(inclusive == INCLUSIVE ? true : false);
ReqSketch reqSk = reqBldr.build();
KllFloatsSketch kllSk = KllFloatsSketch.newHeapInstance(k);
@@ -88,14 +89,14 @@ public class CrossCheckQuantilesTest {
UpdateDoublesSketch udSk = DoublesSketch.builder().setK(k).build();
//SKETCH INPUT.
- float[] baseFVals = {10,20,30,40,50};
+ float[] baseFVals = {10,20,30,40,50};
double[] baseDVals = {10,20,30,40,50};
- int[] baseDups = { 1, 4, 6, 2, 1}; //number of duplicates per base value
+ int[] baseDups = { 1, 4, 6, 2, 1}; //number of duplicates per base value
//RAW TEST INPUT. This simulates what the Sorted View might see.
//This checks the search algos without the sketch and created by hand
float[] rawFVals = {10,20,20,30,30, 30, 40, 50}; //note grouping by twos
- long[] rawCumWts = { 1, 3, 5, 7, 9, 11, 13, 14};
+ long[] rawCumWts = { 1, 3, 5, 7, 9, 11, 13, 14};
//COMPUTE N
int N = 0;
@@ -169,7 +170,7 @@ public class CrossCheckQuantilesTest {
/**************************************/
- println(skType.toString() + " GetQuantile(NormalizedRank), INCLUSIVE = " + inclusive);
+ println(skType.toString() + " GetQuantile(NormalizedRank), INCLUSIVE = " + inclusive.toString());
println(" CumWeight is for illustration");
println(" Convert NormalizedRank to CumWeight (CW).");
println(" Search RSSV CumWeights[] array:");
@@ -186,11 +187,11 @@ public class CrossCheckQuantilesTest {
case REQ: { //this case creates the expected values for all the others
qF = reqSk.getQuantile(testRank, inclusive);
qD = qF;
- if (inclusive) {
+ if (inclusive == INCLUSIVE) {
testQuantileFResults_I[i] = qF;
testQuantileDResults_I[i] = qD;
}
- else {
+ else if (inclusive == NON_INCLUSIVE){
testQuantileFResults_NI[i] = qF;
testQuantileDResults_NI[i] = qD;
}
@@ -199,40 +200,40 @@ public class CrossCheckQuantilesTest {
case REQ_SV: {
qF = rssv.getQuantile(testRank, inclusive);
qD = 0;
- if (inclusive) { assertEquals(qF, testQuantileFResults_I[i]); }
+ if (inclusive == INCLUSIVE) { assertEquals(qF, testQuantileFResults_I[i]); }
else { assertEquals(qF, testQuantileFResults_NI[i]); };
break;
}
case REQ_NO_DEDUP: {
qF = this.getQuantile(rawCumWts, rawFVals, testRank, inclusive);
qD = 0;
- if (inclusive) { assertEquals(qF, testQuantileFResults_I[i]); }
+ if (inclusive == INCLUSIVE) { assertEquals(qF, testQuantileFResults_I[i]); }
else { assertEquals(qF, testQuantileFResults_NI[i]); };
break;
}
- case KLL: {
- qF = kllSk.getQuantile(testRank, inclusive);
- qD = 0;
- if (inclusive) { assertEquals(qF, testQuantileFResults_I[i]); }
- else { assertEquals(qF, testQuantileFResults_NI[i]); };
- break;
- }
- case CLASSIC: {
- qF = 0;
- qD = kllSk.getQuantile(testRank, inclusive);
- if (inclusive) { assertEquals(qD, testQuantileDResults_I[i]); }
- else { assertEquals(qD, testQuantileDResults_NI[i]); };
- break;
- }
+// case KLL: {
+// qF = kllSk.getQuantile(testRank, inclusive);
+// qD = 0;
+// if (inclusive) { assertEquals(qF, testQuantileFResults_I[i]); }
+// else { assertEquals(qF, testQuantileFResults_NI[i]); };
+// break;
+// }
+// case CLASSIC: {
+// qF = 0;
+// qD = kllSk.getQuantile(testRank, inclusive);
+// if (inclusive) { assertEquals(qD, testQuantileDResults_I[i]); }
+// else { assertEquals(qD, testQuantileDResults_NI[i]); };
+// break;
+// }
default: qD = qF = 0; break;
}
- if (inclusive) {
+ if (inclusive == INCLUSIVE) {
if (type == PrimType.DOUBLE) {
printf("%16.3f%16.3f%16.1f%16.1f\n", testRank, testRank * N, qD, testQuantileDResults_I[i]);
} else { //float
printf("%16.3f%16.3f%16.1f%16.1f\n", testRank, testRank * N, qF, testQuantileFResults_I[i]);
}
- } else { //else NI
+ } else if (inclusive == NON_INCLUSIVE){
if (type == PrimType.DOUBLE) {
printf("%16.3f%16.3f%16.1f%16.1f\n", testRank, testRank * N, qD, testQuantileDResults_NI[i]);
} else { //float
@@ -257,39 +258,39 @@ public class CrossCheckQuantilesTest {
switch (skType) {
case REQ: {
r = reqSk.getRank(testValue, inclusive);
- if (inclusive) { testRankResults_I[i] = r; }
- else { testRankResults_NI[i] = r; }
+ if (inclusive == INCLUSIVE) { testRankResults_I[i] = r; }
+ else if (inclusive == NON_INCLUSIVE) { testRankResults_NI[i] = r; }
break;
}
case REQ_SV: {
r = rssv.getRank(testValue, inclusive);
- if (inclusive) { assertEquals(r, testRankResults_I[i]); }
- else { assertEquals(r, testRankResults_NI[i]); };
+ if (inclusive == INCLUSIVE) { assertEquals(r, testRankResults_I[i]); }
+ else if (inclusive == NON_INCLUSIVE) { assertEquals(r, testRankResults_NI[i]); };
break;
}
case REQ_NO_DEDUP: {
r = getRank(rawCumWts, rawFVals, testValue, inclusive);
- if (inclusive) { assertEquals(r, testRankResults_I[i]); }
- else { assertEquals(r, testRankResults_NI[i]); };
- break;
- }
- case KLL: {
- r = kllSk.getRank(testValue, inclusive);
- if (inclusive) { assertEquals(r, testRankResults_I[i]); }
- else { assertEquals(r, testRankResults_NI[i]); };
- break;
- }
- case CLASSIC: {
- r = udSk.getRank(testValue, inclusive);
- if (inclusive) { assertEquals(r, testRankResults_I[i]); }
- else { assertEquals(r, testRankResults_NI[i]); };
+ if (inclusive == INCLUSIVE) { assertEquals(r, testRankResults_I[i]); }
+ else if (inclusive == NON_INCLUSIVE) { assertEquals(r, testRankResults_NI[i]); };
break;
}
+// case KLL: {
+// r = kllSk.getRank(testValue, inclusive);
+// if (inclusive == INCLUSIVE) { assertEquals(r, testRankResults_I[i]); }
+// else if (inclusive == NON_INCLUSIVE) { assertEquals(r, testRankResults_NI[i]); };
+// break;
+// }
+// case CLASSIC: {
+// r = udSk.getRank(testValue, inclusive);
+// if (inclusive == INCLUSIVE) { assertEquals(r, testRankResults_I[i]); }
+// else if (inclusive == NON_INCLUSIVE) { assertEquals(r, testRankResults_NI[i]); };
+// break;
+// }
default: r = 0; break;
}
- if (inclusive) {
+ if (inclusive == INCLUSIVE) {
printf("%16.1f%16.3f%16.3f\n", testValue, r, testRankResults_I[i]);
- } else {
+ } else if (inclusive == NON_INCLUSIVE) {
printf("%16.1f%16.3f%16.3f\n", testValue, r, testRankResults_NI[i]);
}
}
@@ -305,14 +306,15 @@ public class CrossCheckQuantilesTest {
* @return the quantile
*/
public float getQuantile(final long[] cumWeights, final float[] values, final double normRank,
- final boolean inclusive) {
+ final QuantileSearchCriteria inclusive) {
final int len = cumWeights.length;
final long N = cumWeights[len -1];
- final long rank = (int)(normRank * N);
- final InequalitySearch crit = inclusive ? InequalitySearch.GE : InequalitySearch.GT;
+ final long rank = (int)(normRank * N); //denormalize
+ final InequalitySearch crit = inclusive == INCLUSIVE ? InequalitySearch.GE : InequalitySearch.GT;
final int index = InequalitySearch.find(cumWeights, 0, len - 1, rank, crit);
if (index == -1) {
- return values[len - 1]; //GT: normRank >= 1.0; GE: normRank > 1.0
+ if (inclusive == NON_INCLUSIVE_STRICT) { return Float.NaN; } //GT: normRank == 1.0;
+ if (inclusive == NON_INCLUSIVE) { return values[len - 1]; }
}
return values[index];
}
@@ -322,18 +324,19 @@ public class CrossCheckQuantilesTest {
* @param cumWeights the given cumulative weights
* @param values the given values
* @param value the given value
- * @param ltEq determines the search criterion used.
+ * @param inclusive determines the search criterion used.
* @return the normalized rank
*/
- public double getRank(final long[] cumWeights, final float[] values, final float value, final boolean ltEq) {
+ public double getRank(final long[] cumWeights, final float[] values, final float value,
+ final QuantileSearchCriteria inclusive) {
final int len = values.length;
final long N = cumWeights[len -1];
- final InequalitySearch crit = ltEq ? InequalitySearch.LE : InequalitySearch.LT;
+ final InequalitySearch crit = inclusive == INCLUSIVE ? InequalitySearch.LE : InequalitySearch.LT;
final int index = InequalitySearch.find(values, 0, len - 1, value, crit);
if (index == -1) {
return 0; //LT: value <= minValue; LE: value < minValue
}
- return (double)cumWeights[index] / N;
+ return (double)cumWeights[index] / N; //normalize
}
private final static boolean enablePrinting = false;
diff --git a/src/test/java/org/apache/datasketches/req/FloatBufferTest.java b/src/test/java/org/apache/datasketches/req/ReqFloatBufferTest.java
similarity index 93%
rename from src/test/java/org/apache/datasketches/req/FloatBufferTest.java
rename to src/test/java/org/apache/datasketches/req/ReqFloatBufferTest.java
index 54c27a61..7594f90f 100644
--- a/src/test/java/org/apache/datasketches/req/FloatBufferTest.java
+++ b/src/test/java/org/apache/datasketches/req/ReqFloatBufferTest.java
@@ -19,6 +19,8 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
@@ -30,7 +32,7 @@ import org.testng.annotations.Test;
/**
* @author Lee Rhodes
*/
-public class FloatBufferTest {
+public class ReqFloatBufferTest {
@Test
public void checkTrimCount() {
@@ -125,9 +127,9 @@ public class FloatBufferTest {
final FloatBuffer buf = FloatBuffer.wrap(sortedArr, true, spaceAtBottom);
final FloatBuffer buf2 = new FloatBuffer(7,0, spaceAtBottom);
buf2.mergeSortIn(buf);
- assertEquals(buf2.getCountWithCriterion(4, false), 3);
+ assertEquals(buf2.getCountWithCriterion(4, NON_INCLUSIVE), 3);
buf2.mergeSortIn(buf);
- assertEquals(buf2.getCountWithCriterion(4, false), 6);
+ assertEquals(buf2.getCountWithCriterion(4, NON_INCLUSIVE), 6);
assertEquals(buf2.getCount(), 14);
buf2.trimCount(12);
assertEquals(buf2.getCount(), 12);
@@ -153,17 +155,17 @@ public class FloatBufferTest {
//@Test
public void checkCount() {
final FloatBuffer buf = createSortedFloatBuffer(120, 0, true, 100);
- println("LT: " + buf.getCountWithCriterion(100, false));
- println("LE: " + buf.getCountWithCriterion(100, true));
+ println("LT: " + buf.getCountWithCriterion(100, NON_INCLUSIVE));
+ println("LE: " + buf.getCountWithCriterion(100, INCLUSIVE));
}
private static void checkCountWithCriteria(final FloatBuffer buf, final float v) {
int count;
final int len = buf.getCount();
final int iv = (int) v;
- count = buf.getCountWithCriterion(v, false);
+ count = buf.getCountWithCriterion(v, NON_INCLUSIVE);
assertEquals(count, v > len ? len : v <= 1 ? 0 : iv == v? iv - 1 : iv);
- count = buf.getCountWithCriterion(v, true);
+ count = buf.getCountWithCriterion(v, INCLUSIVE);
assertEquals(count, v >= len ? len : v < 1 ? 0 : iv);
}
@@ -230,7 +232,7 @@ public class FloatBufferTest {
buf.append(3); buf.append(2); buf.append(1);
buf.trimCount(4);
assertEquals(buf.getCount(), 3);
- final int cnt = buf.getCountWithCriterion(3.0f, true);
+ final int cnt = buf.getCountWithCriterion(3.0f, INCLUSIVE);
assertEquals(cnt, 3);
assertEquals(buf.getItemFromIndex(2), 3.0f);
try { buf.getEvensOrOdds(0, 3, false); fail(); } catch (final SketchesArgumentException e) {}
diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java
index 82811264..69a02f5c 100644
--- a/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java
+++ b/src/test/java/org/apache/datasketches/req/ReqSketchSortedViewTest.java
@@ -19,6 +19,9 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
+
import org.testng.annotations.Test;
/**
@@ -79,10 +82,10 @@ public class ReqSketchSortedViewTest {
while (itr.next()) {
float v = itr.getValue();
long wt = itr.getWeight();
- long cumWtNotInc = itr.getCumulativeWeight(false);
- double nRankNotInc = itr.getNormalizedRank(false);
- long cumWtInc = itr.getCumulativeWeight(true);
- double nRankInc = itr.getNormalizedRank(true);
+ long cumWtNotInc = itr.getCumulativeWeight(NON_INCLUSIVE);
+ double nRankNotInc = itr.getNormalizedRank(NON_INCLUSIVE);
+ long cumWtInc = itr.getCumulativeWeight(INCLUSIVE);
+ double nRankInc = itr.getNormalizedRank(INCLUSIVE);
printf(fmt, v, wt, cumWtNotInc, nRankNotInc, cumWtInc, nRankInc);
}
}
diff --git a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java
index cdd3a8dd..3ba2700c 100644
--- a/src/test/java/org/apache/datasketches/req/ReqSketchTest.java
+++ b/src/test/java/org/apache/datasketches/req/ReqSketchTest.java
@@ -19,6 +19,11 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantileSearchCriteria.INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE;
+import static org.apache.datasketches.QuantileSearchCriteria.NON_INCLUSIVE_STRICT;
+
+
import static org.apache.datasketches.Util.evenlySpacedFloats;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
@@ -29,6 +34,7 @@ import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.req.ReqSketchSortedView.Row;
import org.testng.annotations.Test;
+import org.apache.datasketches.QuantileSearchCriteria;
/**
* @author Lee Rhodes
@@ -340,8 +346,8 @@ public class ReqSketchTest {
assertEquals(sketch.getRetainedItems(), 10);
for (int i = 1; i <= 10; i++) {
assertEquals(sketch.getRank(i), (i - 1) / 10.0);
- assertEquals(sketch.getRank(i, false), (i - 1) / 10.0);
- assertEquals(sketch.getRank(i, true), (i) / 10.0);
+ assertEquals(sketch.getRank(i, NON_INCLUSIVE), (i - 1) / 10.0);
+ assertEquals(sketch.getRank(i, INCLUSIVE), (i) / 10.0);
}
// inclusive = false (default)
assertEquals(sketch.getQuantile(0), 1); // always min value
@@ -356,17 +362,17 @@ public class ReqSketchTest {
assertEquals(sketch.getQuantile(0.9), 10);
assertEquals(sketch.getQuantile(1), 10); // always max value
// inclusive = true
- assertEquals(sketch.getQuantile(0, true), 1); // always min value
- assertEquals(sketch.getQuantile(0.1, true), 1);
- assertEquals(sketch.getQuantile(0.2, true), 2);
- assertEquals(sketch.getQuantile(0.3, true), 3);
- assertEquals(sketch.getQuantile(0.4, true), 4);
- assertEquals(sketch.getQuantile(0.5, true), 5);
- assertEquals(sketch.getQuantile(0.6, true), 6);
- assertEquals(sketch.getQuantile(0.7, true), 7);
- assertEquals(sketch.getQuantile(0.8, true), 8);
- assertEquals(sketch.getQuantile(0.9, true), 9);
- assertEquals(sketch.getQuantile(1, true), 10); // always max value
+ assertEquals(sketch.getQuantile(0, INCLUSIVE), 1); // always min value
+ assertEquals(sketch.getQuantile(0.1, INCLUSIVE), 1);
+ assertEquals(sketch.getQuantile(0.2, INCLUSIVE), 2);
+ assertEquals(sketch.getQuantile(0.3, INCLUSIVE), 3);
+ assertEquals(sketch.getQuantile(0.4, INCLUSIVE), 4);
+ assertEquals(sketch.getQuantile(0.5, INCLUSIVE), 5);
+ assertEquals(sketch.getQuantile(0.6, INCLUSIVE), 6);
+ assertEquals(sketch.getQuantile(0.7, INCLUSIVE), 7);
+ assertEquals(sketch.getQuantile(0.8, INCLUSIVE), 8);
+ assertEquals(sketch.getQuantile(0.9, INCLUSIVE), 9);
+ assertEquals(sketch.getQuantile(1, INCLUSIVE), 10); // always max value
// getQuantile() and getQuantiles() equivalence
{
@@ -380,9 +386,9 @@ public class ReqSketchTest {
{
// inclusive = true
final float[] quantiles =
- sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, true);
+ sketch.getQuantiles(new double[] {0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1}, INCLUSIVE);
for (int i = 0; i <= 10; i++) {
- assertEquals(sketch.getQuantile(i / 10.0, true), quantiles[i]);
+ assertEquals(sketch.getQuantile(i / 10.0, INCLUSIVE), quantiles[i]);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org