You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2024/02/14 02:29:32 UTC

(datasketches-java) 05/06: This fixes a bug discovered in recent Druid testing.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch improve_partition_boundaries
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 3a9402a9637e469a25d09c3173c9730307ab28d0
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Feb 13 18:26:37 2024 -0800

    This fixes a bug discovered in recent Druid testing.
    
    Makes other improvements to the getPartionBoundaries code.
---
 .../quantiles/ItemsSketchSortedView.java           | 28 ++++++++++++++++------
 .../ItemsSketchPartitionBoundariesTest.java        |  3 ++-
 .../quantiles/ItemsSketchSortedViewString.java     |  5 ++--
 .../datasketches/quantiles/ItemsSketchTest.java    |  3 ++-
 .../quantilescommon/CrossCheckQuantilesTest.java   |  2 +-
 5 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
index fa6d9541..862f54d7 100644
--- a/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
+++ b/src/main/java/org/apache/datasketches/quantiles/ItemsSketchSortedView.java
@@ -19,6 +19,7 @@
 
 package org.apache.datasketches.quantiles;
 
+import static org.apache.datasketches.quantiles.ClassicUtil.getNormalizedRankError;
 import static org.apache.datasketches.quantilescommon.GenericInequalitySearch.find;
 import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
 import static org.apache.datasketches.quantilescommon.QuantilesAPI.EMPTY_MSG;
@@ -55,6 +56,7 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
   private final T maxItem;
   private final T minItem;
   private final Class<T> clazz;
+  private final int k;
 
   /**
    * Construct from elements for testing.
@@ -70,7 +72,8 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
       final long totalN,
       final Comparator<T> comparator,
       final T maxItem,
-      final T minItem) {
+      final T minItem,
+      final int k) {
     this.quantiles = quantiles;
     this.cumWeights = cumWeights;
     this.totalN = totalN;
@@ -78,6 +81,7 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
     this.maxItem = maxItem;
     this.minItem = minItem;
     this.clazz = (Class<T>)quantiles[0].getClass();
+    this.k = k;
   }
 
   /**
@@ -93,9 +97,10 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
     this.quantiles = (T[]) Array.newInstance(sketch.clazz, numQuantiles);
     this.minItem = sketch.minItem_;
     this.maxItem = sketch.maxItem_;
-    cumWeights = new long[numQuantiles];
-    comparator = sketch.getComparator();
-    clazz = sketch.clazz;
+    this.cumWeights = new long[numQuantiles];
+    this.comparator = sketch.getComparator();
+    this.clazz = sketch.clazz;
+    this.k = sketch.getK();
 
     final Object[] combinedBuffer = sketch.getCombinedBuffer();
     final int baseBufferCount = sketch.getBaseBufferCount();
@@ -155,12 +160,21 @@ public class ItemsSketchSortedView<T> implements GenericSortedView<T>, Partition
       final QuantileSearchCriteria searchCrit) {
     if (isEmpty()) { throw new IllegalArgumentException(QuantilesAPI.EMPTY_MSG); }
     final long totalN = this.totalN;
+    final double delta = getNormalizedRankError(k, true) * totalN;
+    final int maxParts = (int) (totalN / Math.ceil(delta * 2) );
     final int svLen = cumWeights.length;
+    if (numEquallySized > maxParts) {
+      throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+          + "Requested number of partitions is too large for this sized sketch. "
+          + "It exceeds maximum number of partitions allowed by the error threshold for this size sketch."
+          + "Requested Partitions: " + numEquallySized + " > " + maxParts);
+    }
     if (numEquallySized > svLen / 2) {
-      throw new IllegalArgumentException(QuantilesAPI.UNSUPPORTED_MSG
-          + " Number of requested partitions is too large for this sized sketch. "
+      throw new SketchesArgumentException(QuantilesAPI.UNSUPPORTED_MSG
+          + "Requested number of partitions is too large for this sized sketch. "
+          + "It exceeds maximum number of retained items divided by 2."
           + "Requested Partitions: " + numEquallySized + " > "
-          + "Retained Items: " + svLen + " / 2.");
+          + "Retained Items / 2: " + (svLen / 2));
     }
 
     final double[] searchNormRanks = evenlySpacedDoubles(0, 1.0, numEquallySized + 1);
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
index 7cafbc39..03c41776 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchPartitionBoundariesTest.java
@@ -29,6 +29,7 @@ import org.apache.datasketches.quantilescommon.GenericSortedViewIterator;
 import org.testng.annotations.Test;
 
 public class ItemsSketchPartitionBoundariesTest {
+  private static final int k = 128;
 
   @Test
   public void checkSimpleEndsAdjustment() {
@@ -39,7 +40,7 @@ public class ItemsSketchPartitionBoundariesTest {
     final String maxItem = "8";
     final String minItem = "1";
     ItemsSketchSortedViewString sv = new ItemsSketchSortedViewString(
-        quantiles, cumWeights, totalN, comparator, maxItem, minItem);
+        quantiles, cumWeights, totalN, comparator, maxItem, minItem, k);
 
     GenericSortedViewIterator<String> itr = sv.iterator();
     while (itr.next()) {
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
index 0dbf5bde..09a8accd 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchSortedViewString.java
@@ -32,7 +32,8 @@ public class ItemsSketchSortedViewString extends ItemsSketchSortedView<String> {
       final long totalN,
       final Comparator<String> comparator,
       final String maxItem,
-      final String minItem) {
-    super(quantiles, cumWeights, totalN, comparator, maxItem, minItem);
+      final String minItem,
+      final int k) {
+    super(quantiles, cumWeights, totalN, comparator, maxItem, minItem, k);
   }
 }
diff --git a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
index 6d1b0d0c..3b458a56 100644
--- a/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/quantiles/ItemsSketchTest.java
@@ -19,6 +19,7 @@
 
 package org.apache.datasketches.quantiles;
 
+import static org.apache.datasketches.quantiles.PreambleUtil.DEFAULT_K;
 import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.EXCLUSIVE;
 import static org.apache.datasketches.quantilescommon.QuantileSearchCriteria.INCLUSIVE;
 import static org.testng.Assert.assertEquals;
@@ -618,7 +619,7 @@ public class ItemsSketchTest {
     Double[] qArr = {8.0, 10.0, 10.0, 20.0};
     long[] cwArr =  {1,  3,  4,  5};
     Comparator<Double> comp = Comparator.naturalOrder();
-    ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0);
+    ItemsSketchSortedView<Double> sv = new ItemsSketchSortedView<>(qArr, cwArr, 5L, comp, 20.0, 8.0, DEFAULT_K);
     double[] ranks = {0, .1, .2, .3, .6, .7, .8, .9, 1.0};
     Double[] qOut = new Double[9];
     for (int i = 0; i < ranks.length; i++) {
diff --git a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
index 2cbc006b..d66bf5f3 100644
--- a/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
+++ b/src/test/java/org/apache/datasketches/quantilescommon/CrossCheckQuantilesTest.java
@@ -349,7 +349,7 @@ public class CrossCheckQuantilesTest {
     kllItemsSV = new KllItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set],
         comparator, svImax, svImin);
     itemsSV = new ItemsSketchSortedViewString(svIValues[set], svCumWeights[set], totalN[set],
-        comparator, svImax, svImin);
+        comparator, svImax, svImin, k);
   }
 
   private final static ReqSketchSortedView getRawReqSV(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org