You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2024/02/14 02:29:32 UTC
(datasketches-java) 05/06: This fixes a bug discovered in recent Druid testing.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch improve_partition_boundaries
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 3a9402a9637e469a25d09c3173c9730307ab28d0
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Feb 13 18:26:37 2024 -0800
This fixes a bug discovered in recent Druid testing.
Makes other improvements to the getPartionBoundaries code.
---
.../quantiles/ItemsSketchSortedView.java | 28 ++++++++++++++++------
.../ItemsSketchPartitionBoundariesTest.java | 3 ++-
.../quantiles/ItemsSketchSortedViewString.java | 5 ++--
.../datasketches/quantiles/ItemsSketchTest.java | 3 ++-
.../quantilescommon/CrossCheckQuantilesTest.java | 2 +-
5 files changed, 29 insertions(+), 12 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
index fa6d9541..862f54d7 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.quantiles;
+import static org.apache.datasketches.quantiles.ClassicUtil.getNormalizedRankError;
import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
@@ -55,6 +56,7 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
private final T maxItem;
private final T minItem;
private final Class<T> clazz;
+ private final int k;
/**
* Construct from elements for testing.
@@ -70,7 +72,8 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
final long totalN,
final Comparator<T> comparator,
final T maxItem,
- final T minItem) {
+ final T minItem,
+ final int k) {
this.quantiles = quantiles;
this.cumWeights = cumWeights;
this.totalN = totalN;
@@ -78,6 +81,7 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
this.maxItem = maxItem;
this.minItem = minItem;
this.clazz = (Class<T>)quantiles[0].getClass();
+ this.k = k;
}
/**
@@ -93,9 +97,10 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles);
this.minItem = sketch.minItem_;
this.maxItem = sketch.maxItem_;
- cumWeights = new long[numQuantiles];
- comparator = sketch.getComparator();
- clazz = sketch.clazz;
+ this.cumWeights = new long[numQuantiles];
+ this.comparator = sketch.getComparator();
+ this.clazz = sketch.clazz;
+ this.k = sketch.getK();
final Object[] combinedBuffer = sketch.getCombinedBuffer();
final int baseBufferCount = sketch.getBaseBufferCount();
@@ -155,12 +160,21 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
final QuantileSearchCriteria searchCrit) {
if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
final long totalN = this.totalN;
+ final double delta = getNormalizedRankError(k, true) * totalN;
+ final int maxParts = (int) (totalN / Math.ceil(delta * 2) );
final int svLen = cumWeights.length;
+ if (numEquallySized > maxParts) {
+ throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+ + "Requested number of partitions is too large for this sized sketch. "
+ + "It exceeds maximum number of partitions allowed by the error threshold for this size sketch."
+ + "Requested Partitions: " + numEquallySized + " > " + maxParts);
+ }
if (numEquallySized > svLen / 2) {
- throw new IllegalArgumentException(QuantilesAPI.UNSUPPORTED_MSG
- + " Number of requested partitions is too large for this sized sketch. "
+ throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+ + "Requested number of partitions is too large for this sized sketch. "
+ + "It exceeds maximum number of retained items divided by 2."
+ "Requested Partitions: " + numEquallySized + " > "
- + "Retained Items: " + svLen + " / 2.");
+ + "Retained Items / 2: " + (svLen / 2));
}
final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1);
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
index 7cafbc39..03c41776 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
@@ -29,6 +29,7 @@ import org.apache.datasketches.quantilescommon.GenericSortedViewIterator;
import org.testng.annotations.Test;
public class ItemsSketchPartitionBoundariesTest {
+ private static final int k = 128;
@Test
public void checkSimpleEndsAdjustment() {
@@ -39,7 +40,7 @@ public class ItemsSketchPartitionBoundariesTest {
final String maxItem = "8";
final String minItem = "1";
ItemsSketchSortedViewString sv = new ItemsSketchSortedViewString(
- quantiles, cumWeights, totalN, comparator, maxItem, minItem);
+ quantiles, cumWeights, totalN, comparator, maxItem, minItem, k);
GenericSortedViewIterator<String> itr = sv.iterator();
while (itr.next()) {
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
index 0dbf5bde..09a8accd 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
@@ -32,7 +32,8 @@ public class ItemsSketchSortedViewString extends ItemsSketchSortedView<String> {
final long totalN,
final Comparator<String> comparator,
final String maxItem,
- final String minItem) {
- super(quantiles, cumWeights, totalN, comparator, maxItem, minItem);
+ final String minItem,
+ final int k) {
+ super(quantiles, cumWeights, totalN, comparator, maxItem, minItem, k);
}
}
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
index 6d1b0d0c..3b458a56 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.quantiles;
+import static org.apache.datasketches.quantiles.PreambleUtil.DEFAULT_K;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
import static org.testng.Assert.assertEquals;
@@ -618,7 +619,7 @@ public class ItemsSketchTest {
Double[] qArr = {8.0, 10.0, 10.0, 20.0};
long[] cwArr = {1, 3, 4, 5};
Comparator<Double> comp = Comparator.naturalOrder();
- ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0);
+ ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0, DEFAULT_K);
double[] ranks = {0, .1, .2, .3, .6, .7, .8, .9, 1.0};
Double[] qOut = new Double[9];
for (int i = 0; i < ranks.length; i++) {
diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
index 2cbc006b..d66bf5f3 100644
--- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
+++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
@@ -349,7 +349,7 @@ public class CrossCheckQuantilesTest {
kllItemsSV = new KllItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set],
comparator, svImax, svImin);
itemsSV = new ItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set],
- comparator, svImax, svImin);
+ comparator, svImax, svImin, k);
}
private final static ReqSketchSortedView getRawReqSV(
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org