You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/07/15 20:01:34 UTC

[datasketches-java] 01/01: KllFloatsSketchSortedView

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch quantiles_sketch_sorted_view
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 42507c908c4de8fa0238235af3facfeab19967f6
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jul 15 13:01:28 2022 -0700

    KllFloatsSketchSortedView
---
 .../org/apache/datasketches/QuantilesHelper.java   |  2 +-
 .../kll/KllDoublesQuantileCalculator.java          |  2 +-
 .../apache/datasketches/kll/KllFloatsHelper.java   | 13 +++--
 .../apache/datasketches/kll/KllFloatsSketch.java   | 13 ++++-
 ...culator.java => KllFloatsSketchSortedView.java} | 30 ++++++----
 .../kll/KllFloatsSketchSortedViewIterator.java     | 68 ++++++++++++++++++++++
 .../datasketches/quantiles/DoublesAuxiliary.java   |  2 +-
 .../datasketches/kll/KllFloatsSketchTest.java      | 54 +++++++++++++++++
 8 files changed, 164 insertions(+), 20 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/QuantilesHelper.java b/src/main/java/org/apache/datasketches/QuantilesHelper.java
index 45ccaf1e..bbb7c489 100644
--- a/src/main/java/org/apache/datasketches/QuantilesHelper.java
+++ b/src/main/java/org/apache/datasketches/QuantilesHelper.java
@@ -31,7 +31,7 @@ public class QuantilesHelper {
    * @param inclusive for treating rank as including weight of an item
    * @return total weight
    */ //used by classic Quantiles and KLL
-  public static long convertToPrecedingCummulative(final long[] array, final boolean inclusive) {
+  public static long convertToPrecedingCumulative(final long[] array, final boolean inclusive) {
     long subtotal = 0;
     for (int i = 0; i < array.length; i++) {
       final long newSubtotal = subtotal + array[i];
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
index 3368afea..29252563 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
@@ -46,7 +46,7 @@ final class KllDoublesQuantileCalculator {
     levels_ = new int[numLevels + 1];
     populateFromSketch(items, levels, numLevels, numItems);
     blockyTandemMergeSort(items_, weights_, levels_, numLevels_);
-    QuantilesHelper.convertToPrecedingCummulative(weights_, inclusive);
+    QuantilesHelper.convertToPrecedingCumulative(weights_, inclusive);
   }
 
   //For testing only. Allows testing of getQuantile without a sketch.
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
index dc202240..c0862f09 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -104,13 +104,13 @@ final class KllFloatsHelper {
     //These two assumptions make KLL compatible with the previous classic Quantiles Sketch
     if (fraction == 0.0) { return mine.getMinFloatValue(); }
     if (fraction == 1.0) { return mine.getMaxFloatValue(); }
-    final KllFloatsQuantileCalculator quant = KllFloatsHelper.getFloatsQuantileCalculator(mine, inclusive);
+    final KllFloatsSketchSortedView quant = KllFloatsHelper.getFloatsSortedView(mine, true, inclusive);
     return quant.getQuantile(fraction);
   }
 
   static float[] getFloatsQuantiles(final KllSketch mine, final double[] fractions, final boolean inclusive) {
     if (mine.isEmpty()) { return null; }
-    KllFloatsQuantileCalculator quant = null;
+    KllFloatsSketchSortedView quant = null;
     final float[] quantiles = new float[fractions.length];
     for (int i = 0; i < fractions.length; i++) {
       final double fraction = fractions[i];
@@ -121,7 +121,7 @@ final class KllFloatsHelper {
       else if (fraction == 1.0) { quantiles[i] = mine.getMaxFloatValue(); }
       else {
         if (quant == null) {
-          quant = KllFloatsHelper.getFloatsQuantileCalculator(mine, inclusive);
+          quant = KllFloatsHelper.getFloatsSortedView(mine, true, inclusive);
         }
         quantiles[i] = quant.getQuantile(fraction);
       }
@@ -436,15 +436,16 @@ final class KllFloatsHelper {
     return new int[] {numLevels, targetItemCount, currentItemCount};
   }
 
-  private static KllFloatsQuantileCalculator getFloatsQuantileCalculator(final KllSketch mine,
-      final boolean inclusive) {
+  static KllFloatsSketchSortedView getFloatsSortedView(final KllSketch mine,
+      final boolean cumulative, final boolean inclusive) {
     final int[] myLevelsArr = mine.getLevelsArray();
     final float[] myFloatItemsArr = mine.getFloatItemsArray();
     if (!mine.isLevelZeroSorted()) {
       Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]);
       if (!mine.hasMemory()) { mine.setLevelZeroSorted(true); }
     }
-    return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN(), inclusive);
+    return new KllFloatsSketchSortedView(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN(),
+        cumulative, inclusive);
   }
 
   private static void incrementFloatBucketsSortedLevel(
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
index 912b9701..b49788af 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -102,7 +102,7 @@ public abstract class KllFloatsSketch extends KllSketch {
    * This will have a rank error of about 1.65%.
    * @return new KllFloatsSketch on the heap.
    */
-  public static KllFloatsSketch  newHeapInstance() {
+  public static KllFloatsSketch newHeapInstance() {
     return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M);
   }
 
@@ -407,6 +407,17 @@ public abstract class KllFloatsSketch extends KllSketch {
     KllFloatsHelper.updateFloat(this, value);
   }
 
+  /**
+   * Sorted view of the sketch.
+   * Complexity: linear merge of sorted levels plus sorting of the level 0.
+   * @param cumulative if true weights are cumulative
+   * @param inclusive if true cumulative weight of an item includes its own weight
+   * @return sorted view object
+   */
+  public KllFloatsSketchSortedView getSortedView(final boolean cumulative, final boolean inclusive) {
+    return KllFloatsHelper.getFloatsSortedView(this, cumulative, inclusive);
+  }
+
   @Override //Artifact of inheritance
   double[] getDoubleItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; }
 
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
similarity index 88%
rename from src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java
rename to src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
index 761e8dd0..250e64d0 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
@@ -22,13 +22,14 @@ package org.apache.datasketches.kll;
 import java.util.Arrays;
 
 import org.apache.datasketches.QuantilesHelper;
+import org.apache.datasketches.SketchesStateException;
 
 /**
  * Data structure for answering quantile queries based on the samples from KllSketch
  * @author Kevin Lang
  * @author Alexander Saydakov
  */
-final class KllFloatsQuantileCalculator {
+public final class KllFloatsSketchSortedView {
 
   private final long n_;
   private final float[] items_;
@@ -37,8 +38,8 @@ final class KllFloatsQuantileCalculator {
   private int numLevels_;
 
   // assumes that all levels are sorted including level 0
-  KllFloatsQuantileCalculator(final float[] items, final int[] levels, final int numLevels,
-      final long n, final boolean inclusive) {
+  KllFloatsSketchSortedView(final float[] items, final int[] levels, final int numLevels,
+      final long n, final boolean cumulative, final boolean inclusive) {
     n_ = n;
     final int numItems = levels[numLevels] - levels[0];
     items_ = new float[numItems];
@@ -46,11 +47,13 @@ final class KllFloatsQuantileCalculator {
     levels_ = new int[numLevels + 1];
     populateFromSketch(items, levels, numLevels, numItems);
     blockyTandemMergeSort(items_, weights_, levels_, numLevels_);
-    QuantilesHelper.convertToPrecedingCummulative(weights_, inclusive);
+    if (cumulative) {
+      QuantilesHelper.convertToPrecedingCumulative(weights_, inclusive);
+    }
   }
 
   //For testing only. Allows testing of getQuantile without a sketch.
-  KllFloatsQuantileCalculator(final float[] items, final long[] weights, final long n) {
+  KllFloatsSketchSortedView(final float[] items, final long[] weights, final long n) {
     n_ = n;
     items_ = items;
     weights_ = weights; //must be size of items + 1
@@ -58,6 +61,18 @@ final class KllFloatsQuantileCalculator {
     numLevels_ = 0;  //not used by test
   }
 
+  public float getQuantile(final double rank) {
+    if (weights_[items_.length] != n_) {
+      throw new SketchesStateException("getQuantile must be used with cumulative view only");
+    }
+    final long pos = QuantilesHelper.posOfRank(rank, n_);
+    return approximatelyAnswerPositonalQuery(pos);
+  }
+
+  public KllFloatsSketchSortedViewIterator iterator() {
+    return new KllFloatsSketchSortedViewIterator(items_, weights_);
+  }
+
   private static void blockyTandemMergeSort(final float[] items, final long[] weights,
       final int[] levels, final int numLevels) {
     if (numLevels == 1) { return; }
@@ -132,11 +147,6 @@ final class KllFloatsQuantileCalculator {
     }
   }
 
-  float getQuantile(final double rank) {
-    final long pos = QuantilesHelper.posOfRank(rank, n_);
-    return approximatelyAnswerPositonalQuery(pos);
-  }
-
   private float approximatelyAnswerPositonalQuery(final long pos) {
     assert pos >= 0;
     assert pos < n_;
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java
new file mode 100644
index 00000000..62fffe18
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.kll;
+
+/**
+ * Iterator over KllFloatsSketchSortedView
+ */
+public class KllFloatsSketchSortedViewIterator {
+
+  private final float[] items_;
+  private final long[] weights_;
+  private int i_;
+
+  KllFloatsSketchSortedViewIterator(final float[] items, final long[] weights) {
+    items_ = items;
+    weights_ = weights;
+    i_ = -1;
+  }
+
+  /**
+   * Gets a value from the current entry.
+   * Don't call this before calling next() for the first time
+   * or after getting false from next().
+   * @return value from the current entry
+   */
+  public float getValue() {
+    return items_[i_];
+  }
+
+  /**
+   * Gets a weight for the value from the current entry.
+   * Don't call this before calling next() for the first time
+   * or after getting false from next().
+   * @return weight for the value from the current entry
+   */
+  public long getWeight() {
+    return weights_[i_];
+  }
+
+  /**
+   * Advancing the iterator and checking existence of the next entry
+   * is combined here for efficiency. This results in an undefined
+   * state of the iterator before the first call of this method.
+   * @return true if the next element exists
+   */
+  public boolean next() {
+    i_++;
+    return i_ < items_.length;
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
index 96e3afa9..684a66fb 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
@@ -61,7 +61,7 @@ final class DoublesAuxiliary {
     //  taking advantage of the already sorted blocks of length k
     blockyTandemMergeSort(itemsArr, cumWtsArr, numSamples, k);
 
-    final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr, inclusive);
+    final long total = QuantilesHelper.convertToPrecedingCumulative(cumWtsArr, inclusive);
     assert total == n;
 
     auxN_ = n;
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
index e121f93d..7d617233 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
@@ -28,6 +28,7 @@ import static org.testng.Assert.assertTrue;
 import static org.testng.Assert.fail;
 
 import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.SketchesStateException;
 import org.apache.datasketches.memory.DefaultMemoryRequestServer;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
@@ -539,4 +540,57 @@ public class KllFloatsSketchTest {
     assertEquals(bytes, 832);
   }
 
+  @Test
+  public void sortedView() {
+    KllFloatsSketch sk = KllFloatsSketch.newHeapInstance();
+    sk.update(3);
+    sk.update(1);
+    sk.update(2);
+    { // non-cumulative (inclusive does not matter in this case)
+      KllFloatsSketchSortedView view = sk.getSortedView(false, false);
+      KllFloatsSketchSortedViewIterator it = view.iterator();
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 1);
+      assertEquals(it.getWeight(), 1);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 2);
+      assertEquals(it.getWeight(), 1);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 3);
+      assertEquals(it.getWeight(), 1);
+      assertEquals(it.next(), false);
+      try {
+        view.getQuantile(0);
+        fail();
+      } catch(SketchesStateException e) {}
+    }
+    { // cumulative, non-inclusive
+      KllFloatsSketchSortedView view = sk.getSortedView(true, false);
+      KllFloatsSketchSortedViewIterator it = view.iterator();
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 1);
+      assertEquals(it.getWeight(), 0);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 2);
+      assertEquals(it.getWeight(), 1);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 3);
+      assertEquals(it.getWeight(), 2);
+      assertEquals(it.next(), false);
+    }
+    { // cumulative, inclusive
+      KllFloatsSketchSortedView view = sk.getSortedView(true, true);
+      KllFloatsSketchSortedViewIterator it = view.iterator();
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 1);
+      assertEquals(it.getWeight(), 1);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 2);
+      assertEquals(it.getWeight(), 2);
+      assertEquals(it.next(), true);
+      assertEquals(it.getValue(), 3);
+      assertEquals(it.getWeight(), 3);
+      assertEquals(it.next(), false);
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org