You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2023/06/13 22:27:18 UTC

[datasketches-java] 01/01: Update theta compact() behavior.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch special_branch_for_theta_compact_update
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 9455569cfda9508a00dd9ed0ec1f901f009a1e63
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jun 13 14:44:29 2023 -0700

    Update theta compact() behavior.
    
    Update associated javadocs.
---
 .../datasketches/theta/CompactOperations.java      |   2 +-
 .../datasketches/theta/HeapCompactSketch.java      |   1 +
 .../java/org/apache/datasketches/theta/Sketch.java |  41 +--
 .../datasketches/theta/CompactSketchTest.java      | 279 ++++++++++++++++-----
 4 files changed, 238 insertions(+), 85 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
index 42fe9fdf..a8066314 100644
--- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
@@ -110,7 +110,7 @@ final class CompactOperations {
    * This assumes hashSeed is OK; serVer = 3.
    * @param srcMem the given input source Memory image
    * @param dstOrdered the desired ordering of the resulting CompactSketch
-   * @param dstMem Used for the target CompactSketch if it is Direct.
+   * @param dstMem Used for the target CompactSketch if it is Memory-based.
    * @return a CompactSketch of the correct form.
    */
   @SuppressWarnings("unused")
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
index f1606884..479aa3ee 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
@@ -77,6 +77,7 @@ class HeapCompactSketch extends CompactSketch {
 
   @Override
   public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+    if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; }
     return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(),
         true, ordered_, dstOrdered, dstMem, getCache().clone());
   }
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index dc7df5a7..99642e12 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -39,7 +39,7 @@ import org.apache.datasketches.thetacommon.BinomialBoundsN;
 import org.apache.datasketches.thetacommon.ThetaUtil;
 
 /**
- * The top-level class for all sketches. This class is never constructed directly.
+ * The top-level class for all theta sketches. This class is never constructed directly.
  * Use the UpdateSketch.builder() methods to create UpdateSketches.
  *
  * @author Lee Rhodes
@@ -193,43 +193,50 @@ public abstract class Sketch {
   //Sketch interface
 
   /**
-   * Converts this sketch to a ordered CompactSketch on the Java heap.
+   * Converts this sketch to a ordered CompactSketch.
    *
-   * <p>If this sketch is already in the proper form, this method returns <i>this</i>,
-   * otherwise, this method returns a new CompactSketch of the proper form.
+   * <p>If <i>this.isCompact() == true</i> this method returns <i>this</i>,
+   * otherwise, this method is equivalent to 
+   * {@link #compact(boolean, WritableMemory) compact(true, null)}.
    *
    * <p>A CompactSketch is always immutable.</p>
    *
-   * @return this sketch as an ordered CompactSketch on the Java heap.
+   * @return this sketch as an ordered CompactSketch.
    */
   public CompactSketch compact() {
-    return compact(true, null);
+    return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
   }
 
   /**
-   * Convert this sketch to a new CompactSketch of the chosen order and direct or on the heap.
+   * Convert this sketch to a <i>CompactSketch</i>.
    *
-   * <p>If this sketch is already in the proper form, this operation returns <i>this</i>,
-   * otherwise, this method returns a new CompactSketch of the proper form.
-   *
-   * <p>If this sketch is a type of UpdateSketch, the compacting process converts the hash table
-   * of the UpdateSketch to a simple list of the valid hash values.
+   * <p>If this sketch is a type of <i>UpdateSketch</i>, the compacting process converts the hash table
+   * of the <i>UpdateSketch</i> to a simple list of the valid hash values.
    * Any hash values of zero or equal-to or greater than theta will be discarded.
-   * The number of valid values remaining in the CompactSketch depends on a number of factors,
+   * The number of valid values remaining in the <i>CompactSketch</i> depends on a number of factors,
    * but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>).
    * It will never exceed 2<i>k</i>.
    * If it is critical to always limit the size to no more than <i>k</i>,
-   * then <i>rebuild()</i> should be called on the UpdateSketch prior to calling this method.</p>
+   * then <i>rebuild()</i> should be called on the <i>UpdateSketch</i> prior to calling this method.</p>
    *
-   * <p>A CompactSketch is always immutable.</p>
+   * <p>A <i>CompactSketch</i> is always immutable.</p>
+   * 
+   * <p>A new <i>CompactSketch</i> object is created:</p>
+   * <ul><li>if <i>dstMem != null</i></li>
+   * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li>
+   * <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item</li> and <i>this.isOrdered() == false</i>
+   * and <i>dstOrdered == true</i>.</li>
+   *</ul>
+   * 
+   * <p>Otherwise, this operation returns <i>this</i>.</p>
    *
-   * @param dstOrdered
+   * @param dstOrdered assumed true if this sketch is empty or has only one value
    * <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
    *
    * @param dstMem
    * <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
    *
-   * @return this sketch as a CompactSketch in the chosen form
+   * @return this sketch as a <i>CompactSketch</i>.
    */
   public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem);
 
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 020e306b..d5aae288 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -21,6 +21,7 @@ package org.apache.datasketches.theta;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
 import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
@@ -220,81 +221,226 @@ public class CompactSketchTest {
 
   @Test
   public void checkCompactCachePart() {
-    //phony values except for curCount = 0.
+    //phoney values except for curCount = 0.
     long[] result = Intersection.compactCachePart(null, 4, 0, 0L, false);
     assertEquals(result.length, 0);
   }
 
+  //See class State
+  //Class Name
+  //Count
+  //Bytes
+  private static final boolean COMPACT = true;
+  private static final boolean EMPTY = true;
+  private static final boolean DIRECT = true;
+  private static final boolean MEMORY = true;
+  private static final boolean ORDERED = true;
+  private static final boolean ESTIMATION = true;
+  
   @Test
-  public void checkDirectCompactSingleItemSketch() {
-    State state;
+  /**
+   * Empty, memory-based Compact sketches are always ordered
+   */
+  public void checkEmptyMemoryCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+
+    WritableMemory wmem1 = WritableMemory.allocate(16);
+    CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty
+    State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(16);
+    CompactSketch csk2 = sk.compact(false, wmem2);
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(16);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state1.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch csk4 = csk1.compact(false, null);
+    State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state4.check(csk4);
+    
+    assertNotEquals(csk1, csk4); //different object because on heap
+    assertFalse(csk1 == csk4);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
+  
+  @Test
+  /**
+   * Single-Item, memory-based Compact sketches are always ordered: 
+   */
+  public void checkSingleItemMemoryCompactSketch() {
     UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    sk.update(1);
+    
+    WritableMemory wmem1 = WritableMemory.allocate(16);
+    CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item
+    State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(16);
+    CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(16);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state1.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
 
-    CompactSketch csko; //ordered
-    CompactSketch csku; //unordered
+  @Test
+  public void checkMultipleItemMemoryCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    //This sequence is naturally out-of-order by the hash values.
+    sk.update(1);
+    sk.update(2);
+    sk.update(3);
+    
+    WritableMemory wmem1 = WritableMemory.allocate(50);
+    CompactSketch csk1 = sk.compact(true, wmem1);
+    State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(50);
+    CompactSketch csk2 = sk.compact(false, wmem2);
+    State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION);
+    state2.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(50);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state2.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
+  
+  @Test
+  /**
+   * Empty, heap-based Compact sketches are always ordered. 
+   * All empty, heap-based, compact sketches point to the same static, final constant of 8 bytes. 
+   */
+  public void checkEmptyHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
 
-    WritableMemory wmem = WritableMemory.allocate(16);
-    csko = sk.compact(true, wmem); //empty, direct, ordered
-    //ClassType, Count, Bytes, Compact, Empty, Direct, Memory, Ordered, Estimation
-    state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
-    state.check(csko);
+    CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty
+    State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+  
+    CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty
+    state1.check(csk1);
+  
+    assertEquals(csk1, csk2);
+    assertTrue(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(false, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //The same object
+    assertTrue(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //The same object
+    assertTrue(csk1 == cskc);
+  }
 
-    wmem = WritableMemory.allocate(16);
-    csku = sk.compact(false, wmem); //empty, direct, unordered
-    state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
-    state.check(csku);
+  @Test
+  /**
+   * Single-Item, heap-based Compact sketches are always ordered.
+   */
+  public void checkSingleItemHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    sk.update(1);
+    
+    CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item
+    State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //calling the compact(boolean, null) method creates a new object
+    assertFalse(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(false, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //The same object
+    assertTrue(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact(); //this, however just returns the same object.
+    state1.check(csk1);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
 
+  @Test
+  public void checkMultipleItemHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    //This sequence is naturally out-of-order by the hash values.
     sk.update(1);
-    wmem = WritableMemory.allocate(16);
-    csko = sk.compact(true, wmem); //Single, direct, ordered
-    state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
-    state.check(csko);
-
-    wmem = WritableMemory.allocate(16);
-    csku = sk.compact(false, wmem); //Single, direct, unordered
-    state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
-    state.check(csku);
-
-    CompactSketch csk2o; //ordered
-    CompactSketch csk2u; //unordered
-
-    csk2o = csku.compact(); //single, heap, ordered
-    state = new State("SingleItemSketch", 1, 16, true, false, false, false, true, false);
-    state.check(csk2o);
-
-    csk2o = csku.compact(true, null); //single, heap, ordered
-    state.check(csk2o);
-
-    csk2o = csku.compact(false, null); //single, heap, unordered
-    state.check(csk2o);
-
-    csk2o = csko.compact(true, null); //single, heap, ordered
-    state.check(csk2o);
-
-    csk2o = csko.compact(false, null); //single, heap, unordered
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2o = csku.compact(true, wmem);
-    state.classType = "DirectCompactSketch";
-    state.memory = true;
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2u = csku.compact(false, wmem);
-    state.classType = "DirectCompactSketch";
-    state.check(csk2u);
-
-    wmem = WritableMemory.allocate(16);
-    csk2o = csko.compact(true, wmem);
-    state.classType = "DirectCompactSketch";
-    state.memory = true;
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2u = csko.compact(false, wmem);
-    state.classType = "DirectCompactSketch";
-    state.check(csk2u);
+    sk.update(2);
+    sk.update(3);
+    
+    CompactSketch csk1 = sk.compact(true, null); //creates a new object
+    State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered
+    State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION);
+    state2.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //order is different and different objects
+    assertFalse(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(true, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered
+    assertTrue(csk1 == csk3);
+    
+    assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true
+    assertFalse(csk2 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
   }
 
   @Test
@@ -471,7 +617,7 @@ public class CompactSketchTest {
       e.printStackTrace();
     }
   }
-
+  
   private static class State {
     String classType = null;
     int count = 0;
@@ -482,8 +628,7 @@ public class CompactSketchTest {
     boolean memory = false;
     boolean ordered = false;
     boolean estimation = false;
-
-
+    
     State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct,
         boolean memory, boolean ordered, boolean estimation) {
       this.classType = classType;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org