You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/09/26 20:34:00 UTC
[incubator-datasketches-java] 01/01: Fix Theta bug wrt
SingleItemSketch merge from Memory.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch BugFixPlusMisc
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git
commit ed3c4f6d510faafaa63d413983a2e80f7e30bc76
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Thu Sep 26 13:33:44 2019 -0700
Fix Theta bug wrt SingleItemSketch merge from Memory.
Added Tuple IntegerSketch, updated DoubleSketch, created engagement
histogram example as test.
Made getThetaLong() public.
---
.../theta/DirectQuickSelectSketchR.java | 4 +-
.../org/apache/datasketches/theta/UnionImpl.java | 3 +-
.../java/org/apache/datasketches/tuple/Sketch.java | 8 +-
.../datasketches/tuple/UpdatableSummary.java | 3 +-
.../datasketches/tuple/adouble/DoubleSketch.java | 60 ++++++++
.../datasketches/tuple/adouble/DoubleSummary.java | 39 ++----
.../tuple/adouble/DoubleSummaryFactory.java | 1 +
.../tuple/adouble/DoubleSummarySetOperations.java | 6 +-
.../tuple/aninteger/IntegerSketch.java | 61 ++++++++
.../IntegerSummary.java} | 90 ++++++------
.../IntegerSummaryDeserializer.java} | 20 +--
.../IntegerSummaryFactory.java} | 25 ++--
.../IntegerSummarySetOperations.java} | 48 ++++---
.../package-info.java} | 14 +-
.../apache/datasketches/theta/UnionImplTest.java | 49 ++++++-
.../tuple/CompactSketchWithDoubleSummaryTest.java | 38 +++--
.../tuple/{ => adouble}/FilterTest.java | 23 +--
.../UpdatableSketchWithDoubleSummaryTest.java | 150 ++++++++++++--------
.../tuple/aninteger/EngagementTest.java | 154 +++++++++++++++++++++
.../tuple/aninteger/IntegerSketchTest.java | 121 ++++++++++++++++
20 files changed, 687 insertions(+), 230 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index 6dfc76b..c50e677 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -262,8 +262,8 @@ class DirectQuickSelectSketchR extends UpdateSketch {
* @return the hash table threshold
*/
static final int setHashTableThreshold(final int lgNomLongs, final int lgArrLongs) {
- //FindBugs may complain if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD, but this allows us
- // to tune these constants for different sketches.
+ //FindBugs may complain (DB_DUPLICATE_BRANCHES) if DQS_RESIZE_THRESHOLD == REBUILD_THRESHOLD,
+ //but this allows us to tune these constants for different sketches.
final double fraction = (lgArrLongs <= lgNomLongs) ? DQS_RESIZE_THRESHOLD : REBUILD_THRESHOLD;
return (int) Math.floor(fraction * (1 << lgArrLongs));
}
diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
index 7cf48d9..dcb5ccd 100644
--- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
@@ -369,7 +369,8 @@ final class UnionImpl extends Union {
// OR the above and the SI bit is set
if (SingleItemSketch.testPre0SeedHash(skMem.getLong(0), seedHash_)) {
final long hash = skMem.getLong(8);
- update(hash); //a hash < 1 will be rejected later
+ //backdoor update, hash function is bypassed. A hash < 1 will be rejected later
+ gadget_.hashUpdate(hash);
return;
}
return; //empty
diff --git a/src/main/java/org/apache/datasketches/tuple/Sketch.java b/src/main/java/org/apache/datasketches/tuple/Sketch.java
index 9e4d11f..5e777c9 100644
--- a/src/main/java/org/apache/datasketches/tuple/Sketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/Sketch.java
@@ -116,10 +116,14 @@ public abstract class Sketch<S extends Summary> {
* @return a SketchIterator
*/
public SketchIterator<S> iterator() {
- return new SketchIterator<S>(keys_, summaries_);
+ return new SketchIterator<>(keys_, summaries_);
}
- long getThetaLong() {
+ /**
+ * Returns Theta as a long
+ * @return Theta as a long
+ */
+ public long getThetaLong() {
return theta_;
}
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
index c66b425..ac5ce93 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
@@ -26,7 +26,8 @@ package org.apache.datasketches.tuple;
public interface UpdatableSummary<U> extends Summary {
/**
- * This is to provide a method of updating summaries
+ * This is to provide a method of updating summaries.
+ * This should not be called by the user.
* @param value update value
*/
public void update(U value);
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java
new file mode 100644
index 0000000..57cc8e6
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSketch.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple.adouble;
+
+import org.apache.datasketches.ResizeFactor;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.UpdatableSketch;
+
+/**
+ * @author Lee Rhodes
+ */
+public class DoubleSketch extends UpdatableSketch<Double, DoubleSummary> {
+
+ /**
+ * Constructs this sketch with given <i>lgK</i>.
+ * @param lgK Log_base2 of <i>Nominal Entries</i>.
+ * <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
+ * @param mode The DoubleSummary mode to be used
+ */
+ public DoubleSketch(final int lgK, final DoubleSummary.Mode mode) {
+ super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new DoubleSummaryFactory(mode));
+ }
+
+ /**
+ * Constructs this sketch from a Memory image, which must be from an DoubleSketch, and
+ * usually with data.
+ * @param mem the given Memory
+ * @param mode The DoubleSummary mode to be used
+ */
+ public DoubleSketch(final Memory mem, final DoubleSummary.Mode mode) {
+ super(mem, new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
+ }
+
+ @Override
+ public void update(final String key, final Double value) {
+ super.update(key, value);
+ }
+
+ @Override
+ public void update(final long key, final Double value) {
+ super.update(key, value);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
index 017f3a6..ae6b7c0 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
@@ -31,6 +31,8 @@ import org.apache.datasketches.tuple.UpdatableSummary;
* Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum.
*/
public final class DoubleSummary implements UpdatableSummary<Double> {
+ private double value_;
+ private final Mode mode_;
/**
* The aggregation modes for this Summary
@@ -48,6 +50,7 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
* <p>New retained value = min(previous retained value, incoming value)</p>
*/
Min,
+
/**
* The aggregation mode is the maximum function.
* <p>New retained value = max(previous retained value, incoming value)</p>
@@ -58,27 +61,21 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
* The aggregation mode is always one.
* <p>New retained value = 1.0</p>
*/
- AlwaysOne,
-
- /**
- * The aggregation mode is increment by one.
- * <p>New retained value = previous retained value + 1.0</p>
- */
- Increment
+ AlwaysOne
}
- private double value_;
- private final Mode mode_;
-
/**
- * Creates an instance of DoubleSummary with zero starting value and default mode (Sum)
+ * Creates an instance of DoubleSummary with a given starting value and mode
+ * @param value starting value
+ * @param mode update mode
*/
- public DoubleSummary() {
- this(0, Mode.Sum);
+ private DoubleSummary(final double value, final Mode mode) {
+ value_ = value;
+ mode_ = mode;
}
/**
- * Creates an instance of DoubleSummary with a starting value and a given mode (Sum)
+ * Creates an instance of DoubleSummary with a given mode.
* @param mode update mode
*/
public DoubleSummary(final Mode mode) {
@@ -96,21 +93,9 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
case AlwaysOne:
value_ = 1.0;
break;
- case Increment:
- value_ = 0;
}
}
- /**
- * Creates an instance of DoubleSummary with a given starting value and mode
- * @param value starting value
- * @param mode update mode
- */
- public DoubleSummary(final double value, final Mode mode) {
- value_ = value;
- mode_ = mode;
- }
-
@Override
public void update(final Double value) {
switch (mode_) {
@@ -126,8 +111,6 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
case AlwaysOne:
value_ = 1.0;
break;
- case Increment:
- value_++;
}
}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java
index 1856d15..8dd7ea7 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java
@@ -31,6 +31,7 @@ public final class DoubleSummaryFactory implements SummaryFactory<DoubleSummary>
/**
* Creates an instance of DoubleSummaryFactory with default mode
*/
+ @Deprecated
public DoubleSummaryFactory() {
summaryMode_ = DoubleSummary.Mode.Sum;
}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java
index 93e3c35..d859883 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java
+++ b/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java
@@ -30,9 +30,12 @@ public final class DoubleSummarySetOperations implements SummarySetOperations<Do
private final Mode summaryMode_;
+ //TODO see IntegerSummarySetOperations for better model
+
/**
* Creates an instance with default mode.
*/
+ @Deprecated
public DoubleSummarySetOperations() {
summaryMode_ = DoubleSummary.Mode.Sum;
}
@@ -53,13 +56,14 @@ public final class DoubleSummarySetOperations implements SummarySetOperations<Do
return result;
}
- @Override
+
/**
* Intersection is not well defined or even meaningful between numeric values.
* Nevertheless, this can be defined to be just a different type of aggregation.
* In this case it is defined to be the same as union. It can be overridden to
* be a more meaningful operation.
*/
+ @Override
public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) {
return union(a, b);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java
new file mode 100644
index 0000000..9d75912
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSketch.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple.aninteger;
+
+import org.apache.datasketches.ResizeFactor;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.UpdatableSketch;
+
+/**
+ * @author Lee Rhodes
+ */
+public class IntegerSketch extends UpdatableSketch<Integer, IntegerSummary> {
+
+ /**
+ * Constructs this sketch with given <i>lgK</i>.
+ * @param lgK Log_base2 of <i>Nominal Entries</i>.
+ * <a href="{@docRoot}/resources/dictionary.html#nomEntries">See Nominal Entries</a>
+ * @param mode The IntegerSummary mode to be used
+ */
+ public IntegerSketch(final int lgK, final IntegerSummary.Mode mode) {
+ super(1 << lgK, ResizeFactor.X8.ordinal(), 1.0F, new IntegerSummaryFactory(mode));
+ }
+
+ /**
+ * Constructs this sketch from a Memory image, which must be from an IntegerSketch, and
+ * usually with data.
+ * @param mem the given Memory
+ * @param mode The IntegerSummary mode to be used
+ */
+ public IntegerSketch(final Memory mem, final IntegerSummary.Mode mode) {
+ super(mem, new IntegerSummaryDeserializer(), new IntegerSummaryFactory(mode));
+ }
+
+ @Override
+ public void update(final String key, final Integer value) {
+ super.update(key, value);
+ }
+
+ @Override
+ public void update(final long key, final Integer value) {
+ super.update(key, value);
+ }
+
+}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
similarity index 58%
copy from src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
copy to src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
index 017f3a6..a0e3e29 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummary.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.datasketches.tuple.adouble;
+package org.apache.datasketches.tuple.aninteger;
import org.apache.datasketches.ByteArrayUtil;
import org.apache.datasketches.memory.Memory;
@@ -25,12 +25,14 @@ import org.apache.datasketches.tuple.DeserializeResult;
import org.apache.datasketches.tuple.UpdatableSummary;
/**
- * Summary for generic tuple sketches of type Double.
- * This summary keeps a double value. On update a predefined operation is performed depending on
+ * Summary for generic tuple sketches of type Integer.
+ * This summary keeps an Integer value. On update a predefined operation is performed depending on
* the mode.
* Supported modes: Sum, Min, Max, AlwaysOne, Increment. The default mode is Sum.
*/
-public final class DoubleSummary implements UpdatableSummary<Double> {
+public class IntegerSummary implements UpdatableSummary<Integer> {
+ private int value_;
+ private final Mode mode_;
/**
* The aggregation modes for this Summary
@@ -48,6 +50,7 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
* <p>New retained value = min(previous retained value, incoming value)</p>
*/
Min,
+
/**
* The aggregation mode is the maximum function.
* <p>New retained value = max(previous retained value, incoming value)</p>
@@ -56,66 +59,53 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
/**
* The aggregation mode is always one.
- * <p>New retained value = 1.0</p>
- */
- AlwaysOne,
-
- /**
- * The aggregation mode is increment by one.
- * <p>New retained value = previous retained value + 1.0</p>
+ * <p>New retained value = 1</p>
*/
- Increment
+ AlwaysOne
}
- private double value_;
- private final Mode mode_;
-
/**
- * Creates an instance of DoubleSummary with zero starting value and default mode (Sum)
+ * Creates an instance of IntegerSummary with a given starting value and mode.
+ * @param value starting value
+ * @param mode update mode
*/
- public DoubleSummary() {
- this(0, Mode.Sum);
+ private IntegerSummary(final int value, final Mode mode) {
+ value_ = value;
+ mode_ = mode;
}
/**
- * Creates an instance of DoubleSummary with a starting value and a given mode (Sum)
- * @param mode update mode
+ * Creates an instance of IntegerSummary with a given mode.
+ * @param mode update mode. This should not be called by a user.
*/
- public DoubleSummary(final Mode mode) {
+ public IntegerSummary(final Mode mode) {
mode_ = mode;
switch (mode) {
case Sum:
value_ = 0;
break;
case Min:
- value_ = Double.POSITIVE_INFINITY;
+ value_ = Integer.MAX_VALUE;
break;
case Max:
- value_ = Double.NEGATIVE_INFINITY;
+ value_ = Integer.MIN_VALUE;
break;
case AlwaysOne:
- value_ = 1.0;
+ value_ = 1;
break;
- case Increment:
- value_ = 0;
}
}
/**
- * Creates an instance of DoubleSummary with a given starting value and mode
- * @param value starting value
- * @param mode update mode
+ * Updates an instance of IntegerSummary with the given value.
+ * This should not be called by the user.
+ * @param value The given value.
*/
- public DoubleSummary(final double value, final Mode mode) {
- value_ = value;
- mode_ = mode;
- }
-
@Override
- public void update(final Double value) {
+ public void update(final Integer value) {
switch (mode_) {
case Sum:
- value_ += value.doubleValue();
+ value_ += value;
break;
case Min:
if (value < value_) { value_ = value; }
@@ -124,45 +114,43 @@ public final class DoubleSummary implements UpdatableSummary<Double> {
if (value > value_) { value_ = value; }
break;
case AlwaysOne:
- value_ = 1.0;
+ value_ = 1;
break;
- case Increment:
- value_++;
}
}
@Override
- public DoubleSummary copy() {
- return new DoubleSummary(value_, mode_);
+ public IntegerSummary copy() {
+ return new IntegerSummary(value_, mode_);
}
/**
- * @return current value of the DoubleSummary
+ * @return current value of the IntegerSummary
*/
- public double getValue() {
+ public int getValue() {
return value_;
}
- private static final int SERIALIZED_SIZE_BYTES = 9;
+ private static final int SERIALIZED_SIZE_BYTES = 5;
private static final int VALUE_INDEX = 0;
- private static final int MODE_BYTE_INDEX = 8;
+ private static final int MODE_BYTE_INDEX = 4;
@Override
public byte[] toByteArray() {
final byte[] bytes = new byte[SERIALIZED_SIZE_BYTES];
- ByteArrayUtil.putDoubleLE(bytes, VALUE_INDEX, value_);
+ ByteArrayUtil.putIntLE(bytes, VALUE_INDEX, value_);
bytes[MODE_BYTE_INDEX] = (byte) mode_.ordinal();
return bytes;
}
/**
- * Creates an instance of the DoubleSummary given a serialized representation
- * @param mem Memory object with serialized DoubleSummary
- * @return DeserializedResult object, which contains a DoubleSummary object and number of bytes
+ * Creates an instance of the IntegerSummary given a serialized representation
+ * @param mem Memory object with serialized IntegerSummary
+ * @return DeserializedResult object, which contains a IntegerSummary object and number of bytes
* read from the Memory
*/
- public static DeserializeResult<DoubleSummary> fromMemory(final Memory mem) {
- return new DeserializeResult<>(new DoubleSummary(mem.getDouble(VALUE_INDEX),
+ public static DeserializeResult<IntegerSummary> fromMemory(final Memory mem) {
+ return new DeserializeResult<>(new IntegerSummary(mem.getInt(VALUE_INDEX),
Mode.values()[mem.getByte(MODE_BYTE_INDEX)]), SERIALIZED_SIZE_BYTES);
}
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java
similarity index 64%
copy from src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
copy to src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java
index c66b425..0a4529e 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryDeserializer.java
@@ -17,18 +17,20 @@
* under the License.
*/
-package org.apache.datasketches.tuple;
+package org.apache.datasketches.tuple.aninteger;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.DeserializeResult;
+import org.apache.datasketches.tuple.SummaryDeserializer;
/**
- * Interface for updating user-defined Summary
- * @param <U> type of update value
+ * @author Lee Rhodes
*/
-public interface UpdatableSummary<U> extends Summary {
+public class IntegerSummaryDeserializer implements SummaryDeserializer<IntegerSummary> {
- /**
- * This is to provide a method of updating summaries
- * @param value update value
- */
- public void update(U value);
+ @Override
+ public DeserializeResult<IntegerSummary> heapifySummary(final Memory mem) {
+ return IntegerSummary.fromMemory(mem);
+ }
}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java
similarity index 62%
copy from src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java
copy to src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java
index 1856d15..0ebb729 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummaryFactory.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummaryFactory.java
@@ -17,35 +17,30 @@
* under the License.
*/
-package org.apache.datasketches.tuple.adouble;
+package org.apache.datasketches.tuple.aninteger;
import org.apache.datasketches.tuple.SummaryFactory;
/**
- * Factory for DoubleSummary.
+ * Factory for IntegerSummary.
+ *
+ * @author Lee Rhodes
*/
-public final class DoubleSummaryFactory implements SummaryFactory<DoubleSummary> {
-
- private final DoubleSummary.Mode summaryMode_;
+public class IntegerSummaryFactory implements SummaryFactory<IntegerSummary> {
- /**
- * Creates an instance of DoubleSummaryFactory with default mode
- */
- public DoubleSummaryFactory() {
- summaryMode_ = DoubleSummary.Mode.Sum;
- }
+ private final IntegerSummary.Mode summaryMode_;
/**
- * Creates an instance of DoubleSummaryFactory with a given mode
+ * Creates an instance of IntegerSummaryFactory with a given mode
* @param summaryMode summary mode
*/
- public DoubleSummaryFactory(final DoubleSummary.Mode summaryMode) {
+ public IntegerSummaryFactory(final IntegerSummary.Mode summaryMode) {
summaryMode_ = summaryMode;
}
@Override
- public DoubleSummary newSummary() {
- return new DoubleSummary(summaryMode_);
+ public IntegerSummary newSummary() {
+ return new IntegerSummary(summaryMode_);
}
}
diff --git a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java
similarity index 50%
copy from src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java
copy to src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java
index 93e3c35..2cfa118 100644
--- a/src/main/java/org/apache/datasketches/tuple/adouble/DoubleSummarySetOperations.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/IntegerSummarySetOperations.java
@@ -17,50 +17,52 @@
* under the License.
*/
-package org.apache.datasketches.tuple.adouble;
+package org.apache.datasketches.tuple.aninteger;
+
+import static org.apache.datasketches.tuple.aninteger.IntegerSummary.Mode;
import org.apache.datasketches.tuple.SummarySetOperations;
-import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode;
/**
- * Methods for defining how unions and intersections of two objects of type DoubleSummary
+ * Methods for defining how unions and intersections of two objects of type IntegerSummary
* are performed. These methods are not called directly by a user.
+ *
+ * @author Lee Rhodes
*/
-public final class DoubleSummarySetOperations implements SummarySetOperations<DoubleSummary> {
+public class IntegerSummarySetOperations implements SummarySetOperations<IntegerSummary> {
- private final Mode summaryMode_;
+ private final Mode unionSummaryMode_;
/**
- * Creates an instance with default mode.
+ * Intersection is not well defined or even meaningful between numeric values.
+ * Nevertheless, this can be defined to be a different type of aggregation for intersecting keys.
*/
- public DoubleSummarySetOperations() {
- summaryMode_ = DoubleSummary.Mode.Sum;
- }
+ private final Mode intersectionSummaryMode_;
/**
- * Creates an instance given a DoubleSummary update mode.
- * @param summaryMode DoubleSummary update mode.
+ * Creates a new instance with two modes
+ * @param unionSummaryMode for unions
+ * @param intersectionSummaryMode for intersections
*/
- public DoubleSummarySetOperations(final Mode summaryMode) {
- summaryMode_ = summaryMode;
+ public IntegerSummarySetOperations(final Mode unionSummaryMode, final Mode intersectionSummaryMode) {
+ unionSummaryMode_ = unionSummaryMode;
+ intersectionSummaryMode_ = intersectionSummaryMode;
}
@Override
- public DoubleSummary union(final DoubleSummary a, final DoubleSummary b) {
- final DoubleSummary result = new DoubleSummary(summaryMode_);
+ public IntegerSummary union(final IntegerSummary a, final IntegerSummary b) {
+ final IntegerSummary result = new IntegerSummary(unionSummaryMode_);
result.update(a.getValue());
result.update(b.getValue());
return result;
}
@Override
- /**
- * Intersection is not well defined or even meaningful between numeric values.
- * Nevertheless, this can be defined to be just a different type of aggregation.
- * In this case it is defined to be the same as union. It can be overridden to
- * be a more meaningful operation.
- */
- public DoubleSummary intersection(final DoubleSummary a, final DoubleSummary b) {
- return union(a, b);
+ public IntegerSummary intersection(final IntegerSummary a, final IntegerSummary b) {
+ final IntegerSummary result = new IntegerSummary(intersectionSummaryMode_);
+ result.update(a.getValue());
+ result.update(b.getValue());
+ return result;
}
+
}
diff --git a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java b/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java
similarity index 72%
copy from src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
copy to src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java
index c66b425..9ca51c6 100644
--- a/src/main/java/org/apache/datasketches/tuple/UpdatableSummary.java
+++ b/src/main/java/org/apache/datasketches/tuple/aninteger/package-info.java
@@ -17,18 +17,8 @@
* under the License.
*/
-package org.apache.datasketches.tuple;
-
/**
- * Interface for updating user-defined Summary
- * @param <U> type of update value
+ * @author Lee Rhodes
*/
-public interface UpdatableSummary<U> extends Summary {
-
- /**
- * This is to provide a method of updating summaries
- * @param value update value
- */
- public void update(U value);
-}
+package org.apache.datasketches.tuple.aninteger;
diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
index 48395d0..7c8667b 100644
--- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
@@ -26,13 +26,12 @@ import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.testng.annotations.Test;
-
+import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.Util;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableDirectHandle;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.SketchesArgumentException;
-import org.apache.datasketches.Util;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class UnionImplTest {
@@ -225,15 +224,51 @@ public class UnionImplTest {
}
@Test
+ public void checkDirectUnionSingleItem() {
+ int num = 2;
+ UpdateSketch[] skArr = new UpdateSketch[num];
+ for (int i = 0; i < num; i++) {
+ skArr[i] = new UpdateSketchBuilder().build();
+ }
+ for (int i = 0; i < (num/2); i++) {
+ skArr[i].update(i);
+ skArr[i + (num/2)].update(i);
+ skArr[i].update(i + num);
+ }
+
+ Union union = new SetOperationBuilder().buildUnion();
+ for (int i = 0; i < num; i++) {
+ union.update(skArr[i]);
+ }
+
+ CompactSketch csk = union.getResult();
+ assertEquals(csk.getEstimate(), 2.0);
+ //println(csk.toString(true, true, 1, true));
+
+ Memory[] memArr = new Memory[num];
+ for (int i = 0; i < num; i++) {
+ memArr[i] = Memory.wrap(skArr[i].compact().toByteArray());
+ }
+ union = new SetOperationBuilder().buildUnion();
+ for (int i = 0; i < num; i++) {
+ union.update(memArr[i]);
+ }
+
+ csk = union.getResult();
+ assertEquals(csk.getEstimate(), 2.0);
+ //println(csk.toString(true, true, 1, true));
+ }
+
+ @Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
}
/**
- * @param s value to print
+ * @param o value to print
*/
- static void println(String s) {
- //System.out.println(s); //disable here
+ static void println(Object o) {
+ //System.out.println(o.toString()); //disable here
}
}
diff --git a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java
index 547986d..ec2222b 100644
--- a/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/CompactSketchWithDoubleSummaryTest.java
@@ -24,6 +24,7 @@ import static org.apache.datasketches.Util.getResourceBytes;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode;
import org.apache.datasketches.tuple.adouble.DoubleSummaryDeserializer;
import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory;
import org.testng.Assert;
@@ -31,9 +32,12 @@ import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class CompactSketchWithDoubleSummaryTest {
+ private final DoubleSummary.Mode mode = Mode.Sum;
+
@Test
public void emptyFromNonPublicConstructorNullArray() {
- CompactSketch<DoubleSummary> sketch = new CompactSketch<>(null, null, Long.MAX_VALUE, true);
+ CompactSketch<DoubleSummary> sketch =
+ new CompactSketch<>(null, null, Long.MAX_VALUE, true);
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
@@ -50,8 +54,10 @@ public class CompactSketchWithDoubleSummaryTest {
@Test
public void emptyFromNonPublicConstructor() {
long[] keys = new long[0];
- DoubleSummary[] summaries = (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0);
- CompactSketch<DoubleSummary> sketch = new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true);
+ DoubleSummary[] summaries =
+ (DoubleSummary[]) java.lang.reflect.Array.newInstance(DoubleSummary.class, 0);
+ CompactSketch<DoubleSummary> sketch =
+ new CompactSketch<>(keys, summaries, Long.MAX_VALUE, true);
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
@@ -67,7 +73,8 @@ public class CompactSketchWithDoubleSummaryTest {
@Test
public void emptyFromQuickSelectSketch() {
- UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ UpdatableSketch<Double, DoubleSummary> us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
CompactSketch<DoubleSummary> sketch = us.compact();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
@@ -84,7 +91,8 @@ public class CompactSketchWithDoubleSummaryTest {
@Test
public void exactModeFromQuickSelectSketch() {
- UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ UpdatableSketch<Double, DoubleSummary> us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
us.update(1, 1.0);
us.update(2, 1.0);
us.update(3, 1.0);
@@ -111,13 +119,15 @@ public class CompactSketchWithDoubleSummaryTest {
@Test
public void serializeDeserializeSmallExact() {
- UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ UpdatableSketch<Double, DoubleSummary> us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
us.update("a", 1.0);
us.update("b", 1.0);
us.update("c", 1.0);
CompactSketch<DoubleSummary> sketch1 = us.compact();
Sketch<DoubleSummary> sketch2 =
- Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer());
+ Sketches.heapifySketch(Memory.wrap(sketch1.toByteArray()),
+ new DoubleSummaryDeserializer());
Assert.assertFalse(sketch2.isEmpty());
Assert.assertFalse(sketch2.isEstimationMode());
Assert.assertEquals(sketch2.getEstimate(), 3.0);
@@ -137,7 +147,8 @@ public class CompactSketchWithDoubleSummaryTest {
@Test
public void serializeDeserializeEstimation() throws Exception {
- UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ UpdatableSketch<Double, DoubleSummary> us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
us.update(i, 1.0);
}
@@ -165,19 +176,22 @@ public class CompactSketchWithDoubleSummaryTest {
@Test(expectedExceptions = SketchesArgumentException.class)
public void deserializeWrongType() {
- UpdatableSketch<Double, DoubleSummary> us = new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ UpdatableSketch<Double, DoubleSummary> us =
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
us.update(i, 1.0);
}
CompactSketch<DoubleSummary> sketch1 = us.compact();
- Sketches.heapifyUpdatableSketch(Memory.wrap(sketch1.toByteArray()), new DoubleSummaryDeserializer(),
- new DoubleSummaryFactory());
+ Sketches.heapifyUpdatableSketch(Memory.wrap(sketch1.toByteArray()),
+ new DoubleSummaryDeserializer(),
+ new DoubleSummaryFactory(mode));
}
@Test
public void serialVersion1Compatibility() throws Exception {
byte[] bytes = getResourceBytes("CompactSketchWithDoubleSummary4K_serialVersion1.bin");
- Sketch<DoubleSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes), new DoubleSummaryDeserializer());
+ Sketch<DoubleSummary> sketch = Sketches.heapifySketch(Memory.wrap(bytes),
+ new DoubleSummaryDeserializer());
Assert.assertTrue(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 8192, 8192 * 0.99);
Assert.assertEquals(sketch.getRetainedEntries(), 4096);
diff --git a/src/test/java/org/apache/datasketches/tuple/FilterTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java
similarity index 91%
rename from src/test/java/org/apache/datasketches/tuple/FilterTest.java
rename to src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java
index 1fbf091..4615768 100644
--- a/src/test/java/org/apache/datasketches/tuple/FilterTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/adouble/FilterTest.java
@@ -17,21 +17,26 @@
* under the License.
*/
-package org.apache.datasketches.tuple;
+package org.apache.datasketches.tuple.adouble;
import java.util.Random;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
+import org.apache.datasketches.tuple.Filter;
+import org.apache.datasketches.tuple.Sketch;
+import org.apache.datasketches.tuple.Sketches;
+import org.apache.datasketches.tuple.UpdatableSketch;
+import org.apache.datasketches.tuple.UpdatableSketchBuilder;
import org.apache.datasketches.tuple.adouble.DoubleSummary;
+import org.apache.datasketches.tuple.adouble.DoubleSummary.Mode;
import org.apache.datasketches.tuple.adouble.DoubleSummaryFactory;
+import org.testng.Assert;
+import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class FilterTest {
-
private static final int numberOfElements = 100;
private static final Random random = new Random(1);//deterministic for this class
+ private final DoubleSummary.Mode mode = Mode.Sum;
@Test
public void emptySketch() {
@@ -64,7 +69,7 @@ public class FilterTest {
@Test
public void filledSketchShouldBehaveTheSame() {
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
fillSketch(sketch, numberOfElements, 0.0);
@@ -82,7 +87,7 @@ public class FilterTest {
@Test
public void filledSketchShouldFilterOutElements() {
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
fillSketch(sketch, numberOfElements, 0.0);
fillSketch(sketch, 2 * numberOfElements, 1.0);
@@ -101,7 +106,7 @@ public class FilterTest {
@Test
public void filteringInEstimationMode() {
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
int n = 10000;
fillSketch(sketch, n, 0.0);
@@ -122,7 +127,7 @@ public class FilterTest {
public void nonEmptySketchWithNoEntries() {
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).setSamplingProbability(0.0001f).build();
+ new DoubleSummaryFactory(mode)).setSamplingProbability(0.0001f).build();
sketch.update(0, 0.0);
Assert.assertFalse(sketch.isEmpty());
diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java
index ea18d4f..7f40d31 100644
--- a/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/adouble/UpdatableSketchWithDoubleSummaryTest.java
@@ -19,6 +19,8 @@
package org.apache.datasketches.tuple.adouble;
+import static org.testng.Assert.assertEquals;
+
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
@@ -37,11 +39,14 @@ import org.testng.annotations.Test;
@SuppressWarnings("javadoc")
public class UpdatableSketchWithDoubleSummaryTest {
+ private final DoubleSummary.Mode mode = Mode.Sum;
@Test
public void isEmpty() {
- UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ int lgK = 12;
+ DoubleSketch sketch = new DoubleSketch(lgK, mode);
+// UpdatableSketch<Double, DoubleSummary> sketch =
+// new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
Assert.assertEquals(sketch.getEstimate(), 0.0);
@@ -56,10 +61,41 @@ public class UpdatableSketchWithDoubleSummaryTest {
}
@Test
+ public void serDeTest() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ DoubleSummary.Mode a1Mode = DoubleSummary.Mode.AlwaysOne;
+ DoubleSketch a1Sk = new DoubleSketch(lgK, a1Mode);
+ int m = 2 * K;
+ for (int key = 0; key < m; key++) {
+ a1Sk.update(key, 1.0);
+ }
+ double est1 = a1Sk.getEstimate();
+ Memory mem = Memory.wrap(a1Sk.toByteArray());
+ DoubleSketch a1Sk2 = new DoubleSketch(mem, a1Mode);
+ double est2 = a1Sk2.getEstimate();
+ assertEquals(est1, est2);
+ }
+
+ @Test
+ public void checkStringKey() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ DoubleSummary.Mode a1Mode = DoubleSummary.Mode.AlwaysOne;
+ DoubleSketch a1Sk1 = new DoubleSketch(lgK, a1Mode);
+ int m = K / 2;
+ for (int key = 0; key < m; key++) {
+ a1Sk1.update(Integer.toHexString(key), 1.0);
+ }
+ assertEquals(a1Sk1.getEstimate(), K / 2.0);
+ }
+
+
+ @Test
public void isEmptyWithSampling() {
float samplingProbability = 0.1f;
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory())
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode))
.setSamplingProbability(samplingProbability).build();
Assert.assertTrue(sketch.isEmpty());
Assert.assertFalse(sketch.isEstimationMode());
@@ -75,7 +111,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
float samplingProbability = 0.001f;
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).setSamplingProbability(samplingProbability).build();
+ new DoubleSummaryFactory(mode)).setSamplingProbability(samplingProbability).build();
sketch.update("a", 1.0);
Assert.assertFalse(sketch.isEmpty());
Assert.assertTrue(sketch.isEstimationMode());
@@ -90,7 +126,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void exactMode() {
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).build();
+ new DoubleSummaryFactory(mode)).build();
Assert.assertTrue(sketch.isEmpty());
Assert.assertEquals(sketch.getEstimate(), 0.0);
for (int i = 1; i <= 4096; i++) {
@@ -129,7 +165,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void estimationMode() {
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).build();
+ new DoubleSummaryFactory(mode)).build();
Assert.assertEquals(sketch.getEstimate(), 0.0);
for (int i = 1; i <= 8192; i++) {
sketch.update(i, 1.0);
@@ -161,7 +197,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void estimationModeWithSamplingNoResizing() {
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory())
+ new DoubleSummaryFactory(mode))
.setSamplingProbability(0.5f)
.setResizeFactor(ResizeFactor.X1).build();
for (int i = 0; i < 16384; i++) {
@@ -176,7 +212,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void updatesOfAllKeyTypes() {
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch.update(1L, 1.0);
sketch.update(2.0, 1.0);
byte[] bytes = { 3 };
@@ -189,18 +225,18 @@ public class UpdatableSketchWithDoubleSummaryTest {
Assert.assertEquals(sketch.getEstimate(), 6.0);
}
- @Test
- public void updateDoubleSummary() {
- DoubleSummary ds = new DoubleSummary();
- ds.update(1.0);
- Assert.assertEquals(ds.getValue(), 1.0);
- }
+// @Test
+// public void updateDoubleSummary() {
+// DoubleSummary ds = new DoubleSummary();
+// ds.update(1.0);
+// Assert.assertEquals(ds.getValue(), 1.0);
+// }
@Test
public void doubleSummaryDefaultSumMode() {
UpdatableSketch<Double, DoubleSummary> sketch =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).build();
+ new DoubleSummaryFactory(mode)).build();
{
sketch.update(1, 1.0);
Assert.assertEquals(sketch.getRetainedEntries(), 1);
@@ -292,12 +328,12 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void serializeDeserializeExact() throws Exception {
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch1.update(1, 1.0);
UpdatableSketch<Double, DoubleSummary> sketch2 = Sketches.heapifyUpdatableSketch(
Memory.wrap(sketch1.toByteArray()),
- new DoubleSummaryDeserializer(), new DoubleSummaryFactory());
+ new DoubleSummaryDeserializer(), new DoubleSummaryFactory(mode));
Assert.assertEquals(sketch2.getEstimate(), 1.0);
SketchIterator<DoubleSummary> it = sketch2.iterator();
@@ -317,7 +353,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void serializeDeserializeEstimationNoResizing() throws Exception {
UpdatableSketch<Double, DoubleSummary> sketch1 =
new UpdatableSketchBuilder<>(
- new DoubleSummaryFactory()).setResizeFactor(ResizeFactor.X1).build();
+ new DoubleSummaryFactory(mode)).setResizeFactor(ResizeFactor.X1).build();
for (int j = 0; j < 10; j++) {
for (int i = 0; i < 8192; i++) {
sketch1.update(i, 1.0);
@@ -348,7 +384,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
int sketchSize = 16384;
int numberOfUniques = sketchSize;
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory())
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode))
.setNominalEntries(sketchSize).setSamplingProbability(0.5f).build();
for (int i = 0; i < numberOfUniques; i++) {
sketch1.update(i, 1.0);
@@ -364,21 +400,21 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void unionExactMode() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch1.update(1, 1.0);
sketch1.update(1, 1.0);
sketch1.update(1, 1.0);
sketch1.update(2, 1.0);
UpdatableSketch<Double, DoubleSummary> sketch2 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch2.update(2, 1.0);
sketch2.update(2, 1.0);
sketch2.update(3, 1.0);
sketch2.update(3, 1.0);
sketch2.update(3, 1.0);
- Union<DoubleSummary> union = new Union<>(new DoubleSummarySetOperations());
+ Union<DoubleSummary> union = new Union<>(new DoubleSummarySetOperations(mode));
union.update(sketch1);
union.update(sketch2);
CompactSketch<DoubleSummary> result = union.getResult();
@@ -408,19 +444,19 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void unionEstimationMode() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch1.update(key++, 1.0);
}
key -= 4096; // overlap half of the entries
UpdatableSketch<Double, DoubleSummary> sketch2 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch2.update(key++, 1.0);
}
- Union<DoubleSummary> union = new Union<>(4096, new DoubleSummarySetOperations());
+ Union<DoubleSummary> union = new Union<>(4096, new DoubleSummarySetOperations(mode));
union.update(sketch1);
union.update(sketch2);
CompactSketch<DoubleSummary> result = union.getResult();
@@ -433,7 +469,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void unionMixedMode() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 1000; i++) {
sketch1.update(key++, 1.0);
//System.out.println("theta1=" + sketch1.getTheta() + " " + sketch1.getThetaLong());
@@ -442,13 +478,13 @@ public class UpdatableSketchWithDoubleSummaryTest {
key -= 500; // overlap half of the entries
UpdatableSketch<Double, DoubleSummary> sketch2 =
new UpdatableSketchBuilder<>
- (new DoubleSummaryFactory()).setSamplingProbability(0.2f).build();
+ (new DoubleSummaryFactory(mode)).setSamplingProbability(0.2f).build();
for (int i = 0; i < 20000; i++) {
sketch2.update(key++, 1.0);
//System.out.println("theta2=" + sketch2.getTheta() + " " + sketch2.getThetaLong());
}
- Union<DoubleSummary> union = new Union<>(4096, new DoubleSummarySetOperations());
+ Union<DoubleSummary> union = new Union<>(4096, new DoubleSummarySetOperations(mode));
union.update(sketch1);
union.update(sketch2);
CompactSketch<DoubleSummary> result = union.getResult();
@@ -460,9 +496,9 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void intersectionEmpty() {
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch);
CompactSketch<DoubleSummary> result = intersection.getResult();
Assert.assertEquals(result.getRetainedEntries(), 0);
@@ -476,10 +512,10 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void intersectionNotEmptyNoEntries() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
new UpdatableSketchBuilder<>
- (new DoubleSummaryFactory()).setSamplingProbability(0.01f).build();
+ (new DoubleSummaryFactory(mode)).setSamplingProbability(0.01f).build();
sketch1.update("a", 1.0); // this happens to get rejected because of sampling with low probability
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
CompactSketch<DoubleSummary> result = intersection.getResult();
Assert.assertEquals(result.getRetainedEntries(), 0);
@@ -492,13 +528,13 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void intersectionExactWithNull() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch1.update(1, 1.0);
sketch1.update(2, 1.0);
sketch1.update(3, 1.0);
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
intersection.update(null);
CompactSketch<DoubleSummary> result = intersection.getResult();
@@ -512,7 +548,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void intersectionExactWithEmpty() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch1.update(1, 1.0);
sketch1.update(2, 1.0);
sketch1.update(3, 1.0);
@@ -520,7 +556,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
Sketch<DoubleSummary> sketch2 = Sketches.createEmptySketch();
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations(Mode.Sum));
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
intersection.update(sketch2);
CompactSketch<DoubleSummary> result = intersection.getResult();
@@ -534,21 +570,21 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void intersectionExactMode() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch1.update(1, 1.0);
sketch1.update(1, 1.0);
sketch1.update(2, 1.0);
sketch1.update(2, 1.0);
UpdatableSketch<Double, DoubleSummary> sketch2 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketch2.update(2, 1.0);
sketch2.update(2, 1.0);
sketch2.update(3, 1.0);
sketch2.update(3, 1.0);
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
intersection.update(sketch2);
CompactSketch<DoubleSummary> result = intersection.getResult();
@@ -577,19 +613,19 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void intersectionDisjointEstimationMode() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch1.update(key++, 1.0);
}
UpdatableSketch<Double, DoubleSummary> sketch2 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch2.update(key++, 1.0);
}
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
intersection.update(sketch2);
CompactSketch<DoubleSummary> result = intersection.getResult();
@@ -613,20 +649,20 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void intersectionEstimationMode() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketch1 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch1.update(key++, 1.0);
}
key -= 4096; // overlap half of the entries
UpdatableSketch<Double, DoubleSummary> sketch2 =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketch2.update(key++, 1.0);
}
Intersection<DoubleSummary> intersection =
- new Intersection<>(new DoubleSummarySetOperations());
+ new Intersection<>(new DoubleSummarySetOperations(mode));
intersection.update(sketch1);
intersection.update(sketch2);
CompactSketch<DoubleSummary> result = intersection.getResult();
@@ -661,7 +697,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
Assert.assertEquals(result.getUpperBound(1), 0.0);
UpdatableSketch<Double, DoubleSummary> sketch =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
aNotB.update(sketch, sketch);
result = aNotB.getResult();
Assert.assertEquals(result.getRetainedEntries(), 0);
@@ -674,10 +710,10 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void aNotBEmptyA() {
UpdatableSketch<Double, DoubleSummary> sketchA =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
UpdatableSketch<Double, DoubleSummary> sketchB =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketchB.update(1, 1.0);
sketchB.update(2, 1.0);
@@ -694,12 +730,12 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void aNotBEmptyB() {
UpdatableSketch<Double, DoubleSummary> sketchA =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketchA.update(1, 1.0);
sketchA.update(2, 1.0);
UpdatableSketch<Double, DoubleSummary> sketchB =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
AnotB<DoubleSummary> aNotB = new AnotB<>();
aNotB.update(sketchA, sketchB);
@@ -723,14 +759,14 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test
public void aNotBExactMode() {
UpdatableSketch<Double, DoubleSummary> sketchA =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketchA.update(1, 1.0);
sketchA.update(1, 1.0);
sketchA.update(2, 1.0);
sketchA.update(2, 1.0);
UpdatableSketch<Double, DoubleSummary> sketchB =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
sketchB.update(2, 1.0);
sketchB.update(2, 1.0);
sketchB.update(3, 1.0);
@@ -754,14 +790,14 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void aNotBEstimationMode() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketchA =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketchA.update(key++, 1.0);
}
key -= 4096; // overlap half of the entries
UpdatableSketch<Double, DoubleSummary> sketchB =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 8192; i++) {
sketchB.update(key++, 1.0);
}
@@ -797,14 +833,14 @@ public class UpdatableSketchWithDoubleSummaryTest {
public void aNotBEstimationModeLargeB() {
int key = 0;
UpdatableSketch<Double, DoubleSummary> sketchA =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 10000; i++) {
sketchA.update(key++, 1.0);
}
key -= 2000; // overlap
UpdatableSketch<Double, DoubleSummary> sketchB =
- new UpdatableSketchBuilder<>(new DoubleSummaryFactory()).build();
+ new UpdatableSketchBuilder<>(new DoubleSummaryFactory(mode)).build();
for (int i = 0; i < 100000; i++) {
sketchB.update(key++, 1.0);
}
@@ -832,7 +868,7 @@ public class UpdatableSketchWithDoubleSummaryTest {
@Test(expectedExceptions = SketchesArgumentException.class)
public void invalidSamplingProbability() {
new UpdatableSketchBuilder<>
- (new DoubleSummaryFactory()).setSamplingProbability(2f).build();
+ (new DoubleSummaryFactory(mode)).setSamplingProbability(2f).build();
}
}
diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
new file mode 100644
index 0000000..d11b1c3
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple.aninteger;
+
+import static java.lang.Math.exp;
+import static java.lang.Math.log;
+import static java.lang.Math.round;
+import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.tuple.CompactSketch;
+import org.apache.datasketches.tuple.SketchIterator;
+import org.apache.datasketches.tuple.Union;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+@SuppressWarnings("javadoc")
+public class EngagementTest {
+
+ @Test
+ public void computeEngagementHistogram() {
+ int lgK = 12;
+ int K = 1 << lgK; // = 4096
+ int days = 30;
+ IntegerSummary.Mode sumMode = IntegerSummary.Mode.Sum;
+ int v = 0;
+ int daysPerMonth = 30;
+ IntegerSketch[] skArr = new IntegerSketch[30];
+ for (int i = 0; i < 30; i++) {
+ skArr[i] = new IntegerSketch(lgK, sumMode);
+ }
+ for (int i = 0; i <= days; i++) { //31 generating indices
+ int numIds = numIDs(daysPerMonth, i);
+ int numDays = numDays(daysPerMonth, i);
+ int myV = v++;
+ for (int d = 0; d < numDays; d++) {
+ for (int id = 0; id < numIds; id++) {
+ skArr[d].update(myV + id, 1);
+ }
+ }
+ v += numIds;
+ }
+
+ int numVisits = unionOps(K, sumMode, skArr);
+ assertEquals(numVisits, 897);
+ }
+
+ @Test
+ public void simpleCheckAlwaysOneIntegerSketch() {
+ int lgK = 12;
+ int K = 1 << lgK; // = 4096
+ IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne;
+
+ IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode);
+ IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode);
+
+ int m = 2 * K;
+ for (int key = 0; key < m; key++) {
+ a1Sk1.update(key, 1);
+ a1Sk2.update(key + (m/2), 1); //overlap by 1/2 = 1.5m = 12288.
+ }
+ int numVisits = unionOps(K, a1Mode, a1Sk1, a1Sk2);
+ assertEquals(numVisits, K);
+ }
+
+ private static int unionOps(int K, IntegerSummary.Mode mode, IntegerSketch ... sketches) {
+ IntegerSummarySetOperations setOps = new IntegerSummarySetOperations(mode, mode);
+ Union<IntegerSummary> union = new Union<>(K, setOps);
+ int len = sketches.length;
+
+ for (IntegerSketch isk : sketches) {
+ union.update(isk);
+ }
+ CompactSketch<IntegerSummary> result = union.getResult();
+ SketchIterator<IntegerSummary> itr = result.iterator();
+
+ int[] freqArr = new int[len +1];
+
+ while (itr.next()) {
+ int value = itr.getSummary().getValue();
+ freqArr[value]++;
+ }
+ println("Engagement Histogram:");
+ printf("%12s,%12s\n","Days Visited", "Visitors");
+ int sumVisitors = 0;
+ int sumVisits = 0;
+ for (int i = 0; i < freqArr.length; i++) {
+ int visits = freqArr[i];
+ if (visits == 0) { continue; }
+ sumVisitors += visits;
+ sumVisits += (visits * i);
+ printf("%12d,%12d\n", i, visits);
+ }
+ println("Total Visitors: " + sumVisitors);
+ println("Total Visits : " + sumVisits);
+ return sumVisits;
+ }
+
+ @Test
+ public void checkPwrLaw() {
+ int dpm = 30;
+ for (int i = 0; i <= dpm; i++) {
+ int numIds = numIDs(dpm, i);
+ int numDays = numDays(dpm, i);
+ printf("%6d%6d%6d\n", i, numIds, numDays);
+ }
+ }
+
+ private static int numIDs(int daysPerMonth, int index) {
+ double d = daysPerMonth;
+ double i = index;
+ return (int)(round(exp((i * log(d)) / d)));
+ }
+
+ private static int numDays(int daysPerMonth, int index) {
+ double d = daysPerMonth;
+ double i = index;
+ return (int)(round(exp(((d - i) * log(d)) / d)));
+
+ }
+
+ /**
+ * @param o object to print
+ */
+ static void println(Object o) {
+ //System.out.println(o.toString()); //Disable
+ }
+
+ /**
+ * @param fmt format
+ * @param args arguments
+ */
+ static void printf(String fmt, Object ... args) {
+ //System.out.printf(fmt, args); //Disable
+ }
+}
diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java
new file mode 100644
index 0000000..34b586f
--- /dev/null
+++ b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.tuple.aninteger;
+
+import static org.testng.Assert.assertEquals;
+
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.tuple.CompactSketch;
+import org.apache.datasketches.tuple.Intersection;
+import org.testng.annotations.Test;
+
+/**
+ * @author Lee Rhodes
+ */
+@SuppressWarnings("javadoc")
+public class IntegerSketchTest {
+
+ @Test
+ public void serDeTest() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne;
+ IntegerSketch a1Sk = new IntegerSketch(lgK, a1Mode);
+ int m = 2 * K;
+ for (int key = 0; key < m; key++) {
+ a1Sk.update(key, 1);
+ }
+ double est1 = a1Sk.getEstimate();
+ Memory mem = Memory.wrap(a1Sk.toByteArray());
+ IntegerSketch a1Sk2 = new IntegerSketch(mem, a1Mode);
+ double est2 = a1Sk2.getEstimate();
+ assertEquals(est1, est2);
+ }
+
+ @Test
+ public void intersectTest() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne;
+ IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode);
+ IntegerSketch a1Sk2 = new IntegerSketch(lgK, a1Mode);
+ int m = 2 * K;
+ for (int key = 0; key < m; key++) {
+ a1Sk1.update(key, 1);
+ a1Sk2.update(key + (m/2), 1);
+ }
+ Intersection<IntegerSummary> inter =
+ new Intersection<>(new IntegerSummarySetOperations(a1Mode, a1Mode));
+ inter.update(a1Sk1);
+ inter.update(a1Sk2);
+ CompactSketch<IntegerSummary> csk = inter.getResult();
+ assertEquals(csk.getEstimate(), K * 1.0, K * .03);
+ }
+
+ @Test
+ public void checkMinMaxMode() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ IntegerSummary.Mode minMode = IntegerSummary.Mode.Min;
+ IntegerSummary.Mode maxMode = IntegerSummary.Mode.Max;
+ IntegerSketch a1Sk1 = new IntegerSketch(lgK, minMode);
+ IntegerSketch a1Sk2 = new IntegerSketch(lgK, maxMode);
+ int m = K / 2;
+ for (int key = 0; key < m; key++) {
+ a1Sk1.update(key, 1);
+ a1Sk1.update(key, 0);
+ a1Sk1.update(key, 2);
+ a1Sk2.update(key + (m/2), 1);
+ a1Sk2.update(key + (m/2), 0);
+ a1Sk2.update(key + (m/2), 2);
+ }
+ double est1 = a1Sk1.getEstimate();
+ double est2 = a1Sk2.getEstimate();
+ assertEquals(est1, est2);
+ }
+
+ @Test
+ public void checkStringKey() {
+ int lgK = 12;
+ int K = 1 << lgK;
+ IntegerSummary.Mode a1Mode = IntegerSummary.Mode.AlwaysOne;
+ IntegerSketch a1Sk1 = new IntegerSketch(lgK, a1Mode);
+ int m = K / 2;
+ for (int key = 0; key < m; key++) {
+ a1Sk1.update(Integer.toHexString(key), 1);
+ }
+ assertEquals(a1Sk1.getEstimate(), K / 2.0);
+ }
+
+ /**
+ * @param o object to print
+ */
+ static void println(Object o) {
+ //System.out.println(o.toString()); //Disable
+ }
+
+ /**
+ * @param fmt format
+ * @param args arguments
+ */
+ static void printf(String fmt, Object ... args) {
+ //System.out.printf(fmt, args); //Disable
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org