You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/07/15 20:01:34 UTC
[datasketches-java] 01/01: KllFloatsSketchSortedView
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch quantiles_sketch_sorted_view
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 42507c908c4de8fa0238235af3facfeab19967f6
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Jul 15 13:01:28 2022 -0700
KllFloatsSketchSortedView
---
.../org/apache/datasketches/QuantilesHelper.java | 2 +-
.../kll/KllDoublesQuantileCalculator.java | 2 +-
.../apache/datasketches/kll/KllFloatsHelper.java | 13 +++--
.../apache/datasketches/kll/KllFloatsSketch.java | 13 ++++-
...culator.java => KllFloatsSketchSortedView.java} | 30 ++++++----
.../kll/KllFloatsSketchSortedViewIterator.java | 68 ++++++++++++++++++++++
.../datasketches/quantiles/DoublesAuxiliary.java | 2 +-
.../datasketches/kll/KllFloatsSketchTest.java | 54 +++++++++++++++++
8 files changed, 164 insertions(+), 20 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/QuantilesHelper.java b/src/main/java/org/apache/datasketches/QuantilesHelper.java
index 45ccaf1e..bbb7c489 100644
--- a/src/main/java/org/apache/datasketches/QuantilesHelper.java
+++ b/src/main/java/org/apache/datasketches/QuantilesHelper.java
@@ -31,7 +31,7 @@ public class QuantilesHelper {
* @param inclusive for treating rank as including weight of an item
* @return total weight
*/ //used by classic Quantiles and KLL
- public static long convertToPrecedingCummulative(final long[] array, final boolean inclusive) {
+ public static long convertToPrecedingCumulative(final long[] array, final boolean inclusive) {
long subtotal = 0;
for (int i = 0; i < array.length; i++) {
final long newSubtotal = subtotal + array[i];
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
index 3368afea..29252563 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesQuantileCalculator.java
@@ -46,7 +46,7 @@ final class KllDoublesQuantileCalculator {
levels_ = new int[numLevels + 1];
populateFromSketch(items, levels, numLevels, numItems);
blockyTandemMergeSort(items_, weights_, levels_, numLevels_);
- QuantilesHelper.convertToPrecedingCummulative(weights_, inclusive);
+ QuantilesHelper.convertToPrecedingCumulative(weights_, inclusive);
}
//For testing only. Allows testing of getQuantile without a sketch.
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
index dc202240..c0862f09 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -104,13 +104,13 @@ final class KllFloatsHelper {
//These two assumptions make KLL compatible with the previous classic Quantiles Sketch
if (fraction == 0.0) { return mine.getMinFloatValue(); }
if (fraction == 1.0) { return mine.getMaxFloatValue(); }
- final KllFloatsQuantileCalculator quant = KllFloatsHelper.getFloatsQuantileCalculator(mine, inclusive);
+ final KllFloatsSketchSortedView quant = KllFloatsHelper.getFloatsSortedView(mine, true, inclusive);
return quant.getQuantile(fraction);
}
static float[] getFloatsQuantiles(final KllSketch mine, final double[] fractions, final boolean inclusive) {
if (mine.isEmpty()) { return null; }
- KllFloatsQuantileCalculator quant = null;
+ KllFloatsSketchSortedView quant = null;
final float[] quantiles = new float[fractions.length];
for (int i = 0; i < fractions.length; i++) {
final double fraction = fractions[i];
@@ -121,7 +121,7 @@ final class KllFloatsHelper {
else if (fraction == 1.0) { quantiles[i] = mine.getMaxFloatValue(); }
else {
if (quant == null) {
- quant = KllFloatsHelper.getFloatsQuantileCalculator(mine, inclusive);
+ quant = KllFloatsHelper.getFloatsSortedView(mine, true, inclusive);
}
quantiles[i] = quant.getQuantile(fraction);
}
@@ -436,15 +436,16 @@ final class KllFloatsHelper {
return new int[] {numLevels, targetItemCount, currentItemCount};
}
- private static KllFloatsQuantileCalculator getFloatsQuantileCalculator(final KllSketch mine,
- final boolean inclusive) {
+ static KllFloatsSketchSortedView getFloatsSortedView(final KllSketch mine,
+ final boolean cumulative, final boolean inclusive) {
final int[] myLevelsArr = mine.getLevelsArray();
final float[] myFloatItemsArr = mine.getFloatItemsArray();
if (!mine.isLevelZeroSorted()) {
Arrays.sort(myFloatItemsArr, myLevelsArr[0], myLevelsArr[1]);
if (!mine.hasMemory()) { mine.setLevelZeroSorted(true); }
}
- return new KllFloatsQuantileCalculator(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN(), inclusive);
+ return new KllFloatsSketchSortedView(myFloatItemsArr, myLevelsArr, mine.getNumLevels(), mine.getN(),
+ cumulative, inclusive);
}
private static void incrementFloatBucketsSortedLevel(
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
index 912b9701..b49788af 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -102,7 +102,7 @@ public abstract class KllFloatsSketch extends KllSketch {
* This will have a rank error of about 1.65%.
* @return new KllFloatsSketch on the heap.
*/
- public static KllFloatsSketch newHeapInstance() {
+ public static KllFloatsSketch newHeapInstance() {
return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M);
}
@@ -407,6 +407,17 @@ public abstract class KllFloatsSketch extends KllSketch {
KllFloatsHelper.updateFloat(this, value);
}
+ /**
+ * Sorted view of the sketch.
+ * Complexity: linear merge of sorted levels plus sorting of the level 0.
+ * @param cumulative if true weights are cumulative
+ * @param inclusive if true cumulative weight of an item includes its own weight
+ * @return sorted view object
+ */
+ public KllFloatsSketchSortedView getSortedView(final boolean cumulative, final boolean inclusive) {
+ return KllFloatsHelper.getFloatsSortedView(this, cumulative, inclusive);
+ }
+
@Override //Artifact of inheritance
double[] getDoubleItemsArray() { kllSketchThrow(MUST_NOT_CALL); return null; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
similarity index 88%
rename from src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java
rename to src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
index 761e8dd0..250e64d0 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsQuantileCalculator.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedView.java
@@ -22,13 +22,14 @@ package org.apache.datasketches.kll;
import java.util.Arrays;
import org.apache.datasketches.QuantilesHelper;
+import org.apache.datasketches.SketchesStateException;
/**
* Data structure for answering quantile queries based on the samples from KllSketch
* @author Kevin Lang
* @author Alexander Saydakov
*/
-final class KllFloatsQuantileCalculator {
+public final class KllFloatsSketchSortedView {
private final long n_;
private final float[] items_;
@@ -37,8 +38,8 @@ final class KllFloatsQuantileCalculator {
private int numLevels_;
// assumes that all levels are sorted including level 0
- KllFloatsQuantileCalculator(final float[] items, final int[] levels, final int numLevels,
- final long n, final boolean inclusive) {
+ KllFloatsSketchSortedView(final float[] items, final int[] levels, final int numLevels,
+ final long n, final boolean cumulative, final boolean inclusive) {
n_ = n;
final int numItems = levels[numLevels] - levels[0];
items_ = new float[numItems];
@@ -46,11 +47,13 @@ final class KllFloatsQuantileCalculator {
levels_ = new int[numLevels + 1];
populateFromSketch(items, levels, numLevels, numItems);
blockyTandemMergeSort(items_, weights_, levels_, numLevels_);
- QuantilesHelper.convertToPrecedingCummulative(weights_, inclusive);
+ if (cumulative) {
+ QuantilesHelper.convertToPrecedingCumulative(weights_, inclusive);
+ }
}
//For testing only. Allows testing of getQuantile without a sketch.
- KllFloatsQuantileCalculator(final float[] items, final long[] weights, final long n) {
+ KllFloatsSketchSortedView(final float[] items, final long[] weights, final long n) {
n_ = n;
items_ = items;
weights_ = weights; //must be size of items + 1
@@ -58,6 +61,18 @@ final class KllFloatsQuantileCalculator {
numLevels_ = 0; //not used by test
}
+ public float getQuantile(final double rank) {
+ if (weights_[items_.length] != n_) {
+ throw new SketchesStateException("getQuantile must be used with cumulative view only");
+ }
+ final long pos = QuantilesHelper.posOfRank(rank, n_);
+ return approximatelyAnswerPositonalQuery(pos);
+ }
+
+ public KllFloatsSketchSortedViewIterator iterator() {
+ return new KllFloatsSketchSortedViewIterator(items_, weights_);
+ }
+
private static void blockyTandemMergeSort(final float[] items, final long[] weights,
final int[] levels, final int numLevels) {
if (numLevels == 1) { return; }
@@ -132,11 +147,6 @@ final class KllFloatsQuantileCalculator {
}
}
- float getQuantile(final double rank) {
- final long pos = QuantilesHelper.posOfRank(rank, n_);
- return approximatelyAnswerPositonalQuery(pos);
- }
-
private float approximatelyAnswerPositonalQuery(final long pos) {
assert pos >= 0;
assert pos < n_;
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java
new file mode 100644
index 00000000..62fffe18
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketchSortedViewIterator.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.kll;
+
+/**
+ * Iterator over KllFloatsSketchSortedView
+ */
+public class KllFloatsSketchSortedViewIterator {
+
+ private final float[] items_;
+ private final long[] weights_;
+ private int i_;
+
+ KllFloatsSketchSortedViewIterator(final float[] items, final long[] weights) {
+ items_ = items;
+ weights_ = weights;
+ i_ = -1;
+ }
+
+ /**
+ * Gets a value from the current entry.
+ * Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return value from the current entry
+ */
+ public float getValue() {
+ return items_[i_];
+ }
+
+ /**
+ * Gets a weight for the value from the current entry.
+ * Don't call this before calling next() for the first time
+ * or after getting false from next().
+ * @return weight for the value from the current entry
+ */
+ public long getWeight() {
+ return weights_[i_];
+ }
+
+ /**
+ * Advancing the iterator and checking existence of the next entry
+ * is combined here for efficiency. This results in an undefined
+ * state of the iterator before the first call of this method.
+ * @return true if the next element exists
+ */
+ public boolean next() {
+ i_++;
+ return i_ < items_.length;
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
index 96e3afa9..684a66fb 100644
--- a/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
+++ b/src/main/java/org/apache/datasketches/quantiles/DoublesAuxiliary.java
@@ -61,7 +61,7 @@ final class DoublesAuxiliary {
// taking advantage of the already sorted blocks of length k
blockyTandemMergeSort(itemsArr, cumWtsArr, numSamples, k);
- final long total = QuantilesHelper.convertToPrecedingCummulative(cumWtsArr, inclusive);
+ final long total = QuantilesHelper.convertToPrecedingCumulative(cumWtsArr, inclusive);
assert total == n;
auxN_ = n;
diff --git a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
index e121f93d..7d617233 100644
--- a/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllFloatsSketchTest.java
@@ -28,6 +28,7 @@ import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.SketchesStateException;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
@@ -539,4 +540,57 @@ public class KllFloatsSketchTest {
assertEquals(bytes, 832);
}
+ @Test
+ public void sortedView() {
+ KllFloatsSketch sk = KllFloatsSketch.newHeapInstance();
+ sk.update(3);
+ sk.update(1);
+ sk.update(2);
+ { // non-cumulative (inclusive does not matter in this case)
+ KllFloatsSketchSortedView view = sk.getSortedView(false, false);
+ KllFloatsSketchSortedViewIterator it = view.iterator();
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 1);
+ assertEquals(it.getWeight(), 1);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 2);
+ assertEquals(it.getWeight(), 1);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 3);
+ assertEquals(it.getWeight(), 1);
+ assertEquals(it.next(), false);
+ try {
+ view.getQuantile(0);
+ fail();
+ } catch(SketchesStateException e) {}
+ }
+ { // cumulative, non-inclusive
+ KllFloatsSketchSortedView view = sk.getSortedView(true, false);
+ KllFloatsSketchSortedViewIterator it = view.iterator();
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 1);
+ assertEquals(it.getWeight(), 0);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 2);
+ assertEquals(it.getWeight(), 1);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 3);
+ assertEquals(it.getWeight(), 2);
+ assertEquals(it.next(), false);
+ }
+ { // cumulative, inclusive
+ KllFloatsSketchSortedView view = sk.getSortedView(true, true);
+ KllFloatsSketchSortedViewIterator it = view.iterator();
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 1);
+ assertEquals(it.getWeight(), 1);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 2);
+ assertEquals(it.getWeight(), 2);
+ assertEquals(it.next(), true);
+ assertEquals(it.getValue(), 3);
+ assertEquals(it.getWeight(), 3);
+ assertEquals(it.next(), false);
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org