You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2023/06/13 22:27:18 UTC
[datasketches-java] 01/01: Update theta compact() behavior.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch special_branch_for_theta_compact_update
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 9455569cfda9508a00dd9ed0ec1f901f009a1e63
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jun 13 14:44:29 2023 -0700
Update theta compact() behavior.
Update associated javadocs.
---
.../datasketches/theta/CompactOperations.java | 2 +-
.../datasketches/theta/HeapCompactSketch.java | 1 +
.../java/org/apache/datasketches/theta/Sketch.java | 41 +--
.../datasketches/theta/CompactSketchTest.java | 279 ++++++++++++++++-----
4 files changed, 238 insertions(+), 85 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
index 42fe9fdf..a8066314 100644
--- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
@@ -110,7 +110,7 @@ final class CompactOperations {
* This assumes hashSeed is OK; serVer = 3.
* @param srcMem the given input source Memory image
* @param dstOrdered the desired ordering of the resulting CompactSketch
- * @param dstMem Used for the target CompactSketch if it is Direct.
+ * @param dstMem Used for the target CompactSketch if it is Memory-based.
* @return a CompactSketch of the correct form.
*/
@SuppressWarnings("unused")
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
index f1606884..479aa3ee 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
@@ -77,6 +77,7 @@ class HeapCompactSketch extends CompactSketch {
@Override
public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+ if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; }
return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(),
true, ordered_, dstOrdered, dstMem, getCache().clone());
}
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index dc7df5a7..99642e12 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -39,7 +39,7 @@ import org.apache.datasketches.thetacommon.BinomialBoundsN;
import org.apache.datasketches.thetacommon.ThetaUtil;
/**
- * The top-level class for all sketches. This class is never constructed directly.
+ * The top-level class for all theta sketches. This class is never constructed directly.
* Use the UpdateSketch.builder() methods to create UpdateSketches.
*
* @author Lee Rhodes
@@ -193,43 +193,50 @@ public abstract class Sketch {
//Sketch interface
/**
- * Converts this sketch to a ordered CompactSketch on the Java heap.
+ * Converts this sketch to a ordered CompactSketch.
*
- * <p>If this sketch is already in the proper form, this method returns <i>this</i>,
- * otherwise, this method returns a new CompactSketch of the proper form.
+ * <p>If <i>this.isCompact() == true</i> this method returns <i>this</i>,
+ * otherwise, this method is equivalent to
+ * {@link #compact(boolean, WritableMemory) compact(true, null)}.
*
* <p>A CompactSketch is always immutable.</p>
*
- * @return this sketch as an ordered CompactSketch on the Java heap.
+ * @return this sketch as an ordered CompactSketch.
*/
public CompactSketch compact() {
- return compact(true, null);
+ return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
}
/**
- * Convert this sketch to a new CompactSketch of the chosen order and direct or on the heap.
+ * Convert this sketch to a <i>CompactSketch</i>.
*
- * <p>If this sketch is already in the proper form, this operation returns <i>this</i>,
- * otherwise, this method returns a new CompactSketch of the proper form.
- *
- * <p>If this sketch is a type of UpdateSketch, the compacting process converts the hash table
- * of the UpdateSketch to a simple list of the valid hash values.
+ * <p>If this sketch is a type of <i>UpdateSketch</i>, the compacting process converts the hash table
+ * of the <i>UpdateSketch</i> to a simple list of the valid hash values.
* Any hash values of zero or equal-to or greater than theta will be discarded.
- * The number of valid values remaining in the CompactSketch depends on a number of factors,
+ * The number of valid values remaining in the <i>CompactSketch</i> depends on a number of factors,
* but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>).
* It will never exceed 2<i>k</i>.
* If it is critical to always limit the size to no more than <i>k</i>,
- * then <i>rebuild()</i> should be called on the UpdateSketch prior to calling this method.</p>
+ * then <i>rebuild()</i> should be called on the <i>UpdateSketch</i> prior to calling this method.</p>
*
- * <p>A CompactSketch is always immutable.</p>
+ * <p>A <i>CompactSketch</i> is always immutable.</p>
+ *
+ * <p>A new <i>CompactSketch</i> object is created:</p>
+ * <ul><li>if <i>dstMem != null</i></li>
+ * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li>
+ * <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item</li> and <i>this.isOrdered() == false</i>
+ * and <i>dstOrdered == true</i>.</li>
+ *</ul>
+ *
+ * <p>Otherwise, this operation returns <i>this</i>.</p>
*
- * @param dstOrdered
+ * @param dstOrdered assumed true if this sketch is empty or has only one value
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
*
* @param dstMem
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
*
- * @return this sketch as a CompactSketch in the chosen form
+ * @return this sketch as a <i>CompactSketch</i>.
*/
public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem);
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 020e306b..d5aae288 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -21,6 +21,7 @@ package org.apache.datasketches.theta;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
@@ -220,81 +221,226 @@ public class CompactSketchTest {
@Test
public void checkCompactCachePart() {
- //phony values except for curCount = 0.
+ //phoney values except for curCount = 0.
long[] result = Intersection.compactCachePart(null, 4, 0, 0L, false);
assertEquals(result.length, 0);
}
+ //See class State
+ //Class Name
+ //Count
+ //Bytes
+ private static final boolean COMPACT = true;
+ private static final boolean EMPTY = true;
+ private static final boolean DIRECT = true;
+ private static final boolean MEMORY = true;
+ private static final boolean ORDERED = true;
+ private static final boolean ESTIMATION = true;
+
@Test
- public void checkDirectCompactSingleItemSketch() {
- State state;
+ /**
+ * Empty, memory-based Compact sketches are always ordered
+ */
+ public void checkEmptyMemoryCompactSketch() {
+ UpdateSketch sk = Sketches.updateSketchBuilder().build();
+
+ WritableMemory wmem1 = WritableMemory.allocate(16);
+ CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty
+ State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ WritableMemory wmem2 = WritableMemory.allocate(16);
+ CompactSketch csk2 = sk.compact(false, wmem2);
+ state1.check(csk2);
+
+ assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertFalse(csk1 == csk2);
+
+ WritableMemory wmem3 = WritableMemory.allocate(16);
+ CompactSketch csk3 = csk1.compact(false, wmem3);
+ state1.check(csk3);
+
+ assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertFalse(csk1 == csk3);
+
+ CompactSketch csk4 = csk1.compact(false, null);
+ State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ state4.check(csk4);
+
+ assertNotEquals(csk1, csk4); //different object because on heap
+ assertFalse(csk1 == csk4);
+
+ CompactSketch cskc = csk1.compact();
+ state1.check(cskc);
+
+ assertEquals(csk1, cskc); //the same object
+ assertTrue(csk1 == cskc);
+ }
+
+ @Test
+ /**
+ * Single-Item, memory-based Compact sketches are always ordered:
+ */
+ public void checkSingleItemMemoryCompactSketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
+ sk.update(1);
+
+ WritableMemory wmem1 = WritableMemory.allocate(16);
+ CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item
+ State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ WritableMemory wmem2 = WritableMemory.allocate(16);
+ CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item
+ state1.check(csk2);
+
+ assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertFalse(csk1 == csk2);
+
+ WritableMemory wmem3 = WritableMemory.allocate(16);
+ CompactSketch csk3 = csk1.compact(false, wmem3);
+ state1.check(csk3);
+
+ assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertFalse(csk1 == csk3);
+
+ CompactSketch cskc = csk1.compact();
+ state1.check(cskc);
+
+ assertEquals(csk1, cskc); //the same object
+ assertTrue(csk1 == cskc);
+ }
- CompactSketch csko; //ordered
- CompactSketch csku; //unordered
+ @Test
+ public void checkMultipleItemMemoryCompactSketch() {
+ UpdateSketch sk = Sketches.updateSketchBuilder().build();
+ //This sequence is naturally out-of-order by the hash values.
+ sk.update(1);
+ sk.update(2);
+ sk.update(3);
+
+ WritableMemory wmem1 = WritableMemory.allocate(50);
+ CompactSketch csk1 = sk.compact(true, wmem1);
+ State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ WritableMemory wmem2 = WritableMemory.allocate(50);
+ CompactSketch csk2 = sk.compact(false, wmem2);
+ State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION);
+ state2.check(csk2);
+
+ assertNotEquals(csk1, csk2); //different object because memory is valid
+ assertFalse(csk1 == csk2);
+
+ WritableMemory wmem3 = WritableMemory.allocate(50);
+ CompactSketch csk3 = csk1.compact(false, wmem3);
+ state2.check(csk3);
+
+ assertNotEquals(csk1, csk3); //different object because memory is valid
+ assertFalse(csk1 == csk3);
+
+ CompactSketch cskc = csk1.compact();
+ state1.check(cskc);
+
+ assertEquals(csk1, cskc); //the same object
+ assertTrue(csk1 == cskc);
+ }
+
+ @Test
+ /**
+ * Empty, heap-based Compact sketches are always ordered.
+ * All empty, heap-based, compact sketches point to the same static, final constant of 8 bytes.
+ */
+ public void checkEmptyHeapCompactSketch() {
+ UpdateSketch sk = Sketches.updateSketchBuilder().build();
- WritableMemory wmem = WritableMemory.allocate(16);
- csko = sk.compact(true, wmem); //empty, direct, ordered
- //ClassType, Count, Bytes, Compact, Empty, Direct, Memory, Ordered, Estimation
- state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
- state.check(csko);
+ CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty
+ State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty
+ state1.check(csk1);
+
+ assertEquals(csk1, csk2);
+ assertTrue(csk1 == csk2);
+
+ CompactSketch csk3 = csk1.compact(false, null);
+ state1.check(csk3);
+
+ assertEquals(csk1, csk3); //The same object
+ assertTrue(csk1 == csk3);
+
+ CompactSketch cskc = csk1.compact();
+ state1.check(cskc);
+
+ assertEquals(csk1, cskc); //The same object
+ assertTrue(csk1 == cskc);
+ }
- wmem = WritableMemory.allocate(16);
- csku = sk.compact(false, wmem); //empty, direct, unordered
- state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
- state.check(csku);
+ @Test
+ /**
+ * Single-Item, heap-based Compact sketches are always ordered.
+ */
+ public void checkSingleItemHeapCompactSketch() {
+ UpdateSketch sk = Sketches.updateSketchBuilder().build();
+ sk.update(1);
+
+ CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item
+ State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item
+ state1.check(csk2);
+
+ assertNotEquals(csk1, csk2); //calling the compact(boolean, null) method creates a new object
+ assertFalse(csk1 == csk2);
+
+ CompactSketch csk3 = csk1.compact(false, null);
+ state1.check(csk3);
+
+ assertEquals(csk1, csk3); //The same object
+ assertTrue(csk1 == csk3);
+
+ CompactSketch cskc = csk1.compact(); //this, however just returns the same object.
+ state1.check(csk1);
+
+ assertEquals(csk1, cskc); //the same object
+ assertTrue(csk1 == cskc);
+ }
+ @Test
+ public void checkMultipleItemHeapCompactSketch() {
+ UpdateSketch sk = Sketches.updateSketchBuilder().build();
+ //This sequence is naturally out-of-order by the hash values.
sk.update(1);
- wmem = WritableMemory.allocate(16);
- csko = sk.compact(true, wmem); //Single, direct, ordered
- state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
- state.check(csko);
-
- wmem = WritableMemory.allocate(16);
- csku = sk.compact(false, wmem); //Single, direct, unordered
- state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
- state.check(csku);
-
- CompactSketch csk2o; //ordered
- CompactSketch csk2u; //unordered
-
- csk2o = csku.compact(); //single, heap, ordered
- state = new State("SingleItemSketch", 1, 16, true, false, false, false, true, false);
- state.check(csk2o);
-
- csk2o = csku.compact(true, null); //single, heap, ordered
- state.check(csk2o);
-
- csk2o = csku.compact(false, null); //single, heap, unordered
- state.check(csk2o);
-
- csk2o = csko.compact(true, null); //single, heap, ordered
- state.check(csk2o);
-
- csk2o = csko.compact(false, null); //single, heap, unordered
- state.check(csk2o);
-
- wmem = WritableMemory.allocate(16);
- csk2o = csku.compact(true, wmem);
- state.classType = "DirectCompactSketch";
- state.memory = true;
- state.check(csk2o);
-
- wmem = WritableMemory.allocate(16);
- csk2u = csku.compact(false, wmem);
- state.classType = "DirectCompactSketch";
- state.check(csk2u);
-
- wmem = WritableMemory.allocate(16);
- csk2o = csko.compact(true, wmem);
- state.classType = "DirectCompactSketch";
- state.memory = true;
- state.check(csk2o);
-
- wmem = WritableMemory.allocate(16);
- csk2u = csko.compact(false, wmem);
- state.classType = "DirectCompactSketch";
- state.check(csk2u);
+ sk.update(2);
+ sk.update(3);
+
+ CompactSketch csk1 = sk.compact(true, null); //creates a new object
+ State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+ state1.check(csk1);
+
+ CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered
+ State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION);
+ state2.check(csk2);
+
+ assertNotEquals(csk1, csk2); //order is different and different objects
+ assertFalse(csk1 == csk2);
+
+ CompactSketch csk3 = csk1.compact(true, null);
+ state1.check(csk3);
+
+ assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered
+ assertTrue(csk1 == csk3);
+
+ assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true
+ assertFalse(csk2 == csk3);
+
+ CompactSketch cskc = csk1.compact();
+ state1.check(cskc);
+
+ assertEquals(csk1, cskc); //the same object
+ assertTrue(csk1 == cskc);
}
@Test
@@ -471,7 +617,7 @@ public class CompactSketchTest {
e.printStackTrace();
}
}
-
+
private static class State {
String classType = null;
int count = 0;
@@ -482,8 +628,7 @@ public class CompactSketchTest {
boolean memory = false;
boolean ordered = false;
boolean estimation = false;
-
-
+
State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct,
boolean memory, boolean ordered, boolean estimation) {
this.classType = classType;
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org