You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2023/06/13 22:27:17 UTC

[datasketches-java] branch special_branch_for_theta_compact_update created (now 9455569c)

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a change to branch special_branch_for_theta_compact_update
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git


      at 9455569c Update theta compact() behavior.

This branch includes the following new commits:

     new 9455569c Update theta compact() behavior.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-java] 01/01: Update theta compact() behavior.

Posted by le...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch special_branch_for_theta_compact_update
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 9455569cfda9508a00dd9ed0ec1f901f009a1e63
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jun 13 14:44:29 2023 -0700

    Update theta compact() behavior.
    
    Update associated javadocs.
---
 .../datasketches/theta/CompactOperations.java      |   2 +-
 .../datasketches/theta/HeapCompactSketch.java      |   1 +
 .../java/org/apache/datasketches/theta/Sketch.java |  41 +--
 .../datasketches/theta/CompactSketchTest.java      | 279 ++++++++++++++++-----
 4 files changed, 238 insertions(+), 85 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
index 42fe9fdf..a8066314 100644
--- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
@@ -110,7 +110,7 @@ final class CompactOperations {
    * This assumes hashSeed is OK; serVer = 3.
    * @param srcMem the given input source Memory image
    * @param dstOrdered the desired ordering of the resulting CompactSketch
-   * @param dstMem Used for the target CompactSketch if it is Direct.
+   * @param dstMem Used for the target CompactSketch if it is Memory-based.
    * @return a CompactSketch of the correct form.
    */
   @SuppressWarnings("unused")
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
index f1606884..479aa3ee 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
@@ -77,6 +77,7 @@ class HeapCompactSketch extends CompactSketch {
 
   @Override
   public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+    if (dstMem == null && (dstOrdered == false || this.ordered_ == dstOrdered)) { return this; }
     return componentsToCompact(getThetaLong(), getRetainedEntries(true), getSeedHash(), isEmpty(),
         true, ordered_, dstOrdered, dstMem, getCache().clone());
   }
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index dc7df5a7..99642e12 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -39,7 +39,7 @@ import org.apache.datasketches.thetacommon.BinomialBoundsN;
 import org.apache.datasketches.thetacommon.ThetaUtil;
 
 /**
- * The top-level class for all sketches. This class is never constructed directly.
+ * The top-level class for all theta sketches. This class is never constructed directly.
  * Use the UpdateSketch.builder() methods to create UpdateSketches.
  *
  * @author Lee Rhodes
@@ -193,43 +193,50 @@ public abstract class Sketch {
   //Sketch interface
 
   /**
-   * Converts this sketch to a ordered CompactSketch on the Java heap.
+   * Converts this sketch to a ordered CompactSketch.
    *
-   * <p>If this sketch is already in the proper form, this method returns <i>this</i>,
-   * otherwise, this method returns a new CompactSketch of the proper form.
+   * <p>If <i>this.isCompact() == true</i> this method returns <i>this</i>,
+   * otherwise, this method is equivalent to 
+   * {@link #compact(boolean, WritableMemory) compact(true, null)}.
    *
    * <p>A CompactSketch is always immutable.</p>
    *
-   * @return this sketch as an ordered CompactSketch on the Java heap.
+   * @return this sketch as an ordered CompactSketch.
    */
   public CompactSketch compact() {
-    return compact(true, null);
+    return (this.isCompact()) ? (CompactSketch)this : compact(true, null);
   }
 
   /**
-   * Convert this sketch to a new CompactSketch of the chosen order and direct or on the heap.
+   * Convert this sketch to a <i>CompactSketch</i>.
    *
-   * <p>If this sketch is already in the proper form, this operation returns <i>this</i>,
-   * otherwise, this method returns a new CompactSketch of the proper form.
-   *
-   * <p>If this sketch is a type of UpdateSketch, the compacting process converts the hash table
-   * of the UpdateSketch to a simple list of the valid hash values.
+   * <p>If this sketch is a type of <i>UpdateSketch</i>, the compacting process converts the hash table
+   * of the <i>UpdateSketch</i> to a simple list of the valid hash values.
    * Any hash values of zero or equal-to or greater than theta will be discarded.
-   * The number of valid values remaining in the CompactSketch depends on a number of factors,
+   * The number of valid values remaining in the <i>CompactSketch</i> depends on a number of factors,
    * but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>).
    * It will never exceed 2<i>k</i>.
    * If it is critical to always limit the size to no more than <i>k</i>,
-   * then <i>rebuild()</i> should be called on the UpdateSketch prior to calling this method.</p>
+   * then <i>rebuild()</i> should be called on the <i>UpdateSketch</i> prior to calling this method.</p>
    *
-   * <p>A CompactSketch is always immutable.</p>
+   * <p>A <i>CompactSketch</i> is always immutable.</p>
+   * 
+   * <p>A new <i>CompactSketch</i> object is created:</p>
+   * <ul><li>if <i>dstMem != null</i></li>
+   * <li>if <i>dstMem == null</i> and <i>this.hasMemory() == true</i></li>
+   * <li>if <i>dstMem == null</i> and <i>this</i> has more than 1 item</li> and <i>this.isOrdered() == false</i>
+   * and <i>dstOrdered == true</i>.</li>
+   *</ul>
+   * 
+   * <p>Otherwise, this operation returns <i>this</i>.</p>
    *
-   * @param dstOrdered
+   * @param dstOrdered assumed true if this sketch is empty or has only one value
    * <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
    *
    * @param dstMem
    * <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
    *
-   * @return this sketch as a CompactSketch in the chosen form
+   * @return this sketch as a <i>CompactSketch</i>.
    */
   public abstract CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem);
 
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 020e306b..d5aae288 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -21,6 +21,7 @@ package org.apache.datasketches.theta;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
 import static org.testng.Assert.assertNotNull;
 import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
@@ -220,81 +221,226 @@ public class CompactSketchTest {
 
   @Test
   public void checkCompactCachePart() {
-    //phony values except for curCount = 0.
+    //phoney values except for curCount = 0.
     long[] result = Intersection.compactCachePart(null, 4, 0, 0L, false);
     assertEquals(result.length, 0);
   }
 
+  //See class State
+  //Class Name
+  //Count
+  //Bytes
+  private static final boolean COMPACT = true;
+  private static final boolean EMPTY = true;
+  private static final boolean DIRECT = true;
+  private static final boolean MEMORY = true;
+  private static final boolean ORDERED = true;
+  private static final boolean ESTIMATION = true;
+  
   @Test
-  public void checkDirectCompactSingleItemSketch() {
-    State state;
+  /**
+   * Empty, memory-based Compact sketches are always ordered
+   */
+  public void checkEmptyMemoryCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+
+    WritableMemory wmem1 = WritableMemory.allocate(16);
+    CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when empty
+    State state1 = new State("DirectCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(16);
+    CompactSketch csk2 = sk.compact(false, wmem2);
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(16);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state1.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch csk4 = csk1.compact(false, null);
+    State state4 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state4.check(csk4);
+    
+    assertNotEquals(csk1, csk4); //different object because on heap
+    assertFalse(csk1 == csk4);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
+  
+  @Test
+  /**
+   * Single-Item, memory-based Compact sketches are always ordered: 
+   */
+  public void checkSingleItemMemoryCompactSketch() {
     UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    sk.update(1);
+    
+    WritableMemory wmem1 = WritableMemory.allocate(16);
+    CompactSketch csk1 = sk.compact(false, wmem1); //the first parameter is ignored when single item
+    State state1 = new State("DirectCompactSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(16);
+    CompactSketch csk2 = sk.compact(false, wmem2); //the first parameter is ignored when single item
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(16);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state1.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
 
-    CompactSketch csko; //ordered
-    CompactSketch csku; //unordered
+  @Test
+  public void checkMultipleItemMemoryCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    //This sequence is naturally out-of-order by the hash values.
+    sk.update(1);
+    sk.update(2);
+    sk.update(3);
+    
+    WritableMemory wmem1 = WritableMemory.allocate(50);
+    CompactSketch csk1 = sk.compact(true, wmem1);
+    State state1 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    WritableMemory wmem2 = WritableMemory.allocate(50);
+    CompactSketch csk2 = sk.compact(false, wmem2);
+    State state2 = new State("DirectCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, MEMORY, !ORDERED, !ESTIMATION);
+    state2.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //different object because memory is valid
+    assertFalse(csk1 == csk2);
+    
+    WritableMemory wmem3 = WritableMemory.allocate(50);
+    CompactSketch csk3 = csk1.compact(false, wmem3);
+    state2.check(csk3);
+    
+    assertNotEquals(csk1, csk3); //different object because memory is valid
+    assertFalse(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
+  
+  @Test
+  /**
+   * Empty, heap-based Compact sketches are always ordered. 
+   * All empty, heap-based, compact sketches point to the same static, final constant of 8 bytes. 
+   */
+  public void checkEmptyHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
 
-    WritableMemory wmem = WritableMemory.allocate(16);
-    csko = sk.compact(true, wmem); //empty, direct, ordered
-    //ClassType, Count, Bytes, Compact, Empty, Direct, Memory, Ordered, Estimation
-    state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
-    state.check(csko);
+    CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when empty
+    State state1 = new State("EmptyCompactSketch", 0, 8, COMPACT, EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+  
+    CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when empty
+    state1.check(csk1);
+  
+    assertEquals(csk1, csk2);
+    assertTrue(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(false, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //The same object
+    assertTrue(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //The same object
+    assertTrue(csk1 == cskc);
+  }
 
-    wmem = WritableMemory.allocate(16);
-    csku = sk.compact(false, wmem); //empty, direct, unordered
-    state = new State("DirectCompactSketch", 0, 8, true, true, false, true, true, false);
-    state.check(csku);
+  @Test
+  /**
+   * Single-Item, heap-based Compact sketches are always ordered.
+   */
+  public void checkSingleItemHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    sk.update(1);
+    
+    CompactSketch csk1 = sk.compact(false, null); //the first parameter is ignored when single item
+    State state1 = new State("SingleItemSketch", 1, 16, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    CompactSketch csk2 = sk.compact(false, null); //the first parameter is ignored when single item
+    state1.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //calling the compact(boolean, null) method creates a new object
+    assertFalse(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(false, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //The same object
+    assertTrue(csk1 == csk3);
+    
+    CompactSketch cskc = csk1.compact(); //this, however just returns the same object.
+    state1.check(csk1);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
+  }
 
+  @Test
+  public void checkMultipleItemHeapCompactSketch() {
+    UpdateSketch sk = Sketches.updateSketchBuilder().build();
+    //This sequence is naturally out-of-order by the hash values.
     sk.update(1);
-    wmem = WritableMemory.allocate(16);
-    csko = sk.compact(true, wmem); //Single, direct, ordered
-    state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
-    state.check(csko);
-
-    wmem = WritableMemory.allocate(16);
-    csku = sk.compact(false, wmem); //Single, direct, unordered
-    state = new State("DirectCompactSketch", 1, 16, true, false, false, true, true, false);
-    state.check(csku);
-
-    CompactSketch csk2o; //ordered
-    CompactSketch csk2u; //unordered
-
-    csk2o = csku.compact(); //single, heap, ordered
-    state = new State("SingleItemSketch", 1, 16, true, false, false, false, true, false);
-    state.check(csk2o);
-
-    csk2o = csku.compact(true, null); //single, heap, ordered
-    state.check(csk2o);
-
-    csk2o = csku.compact(false, null); //single, heap, unordered
-    state.check(csk2o);
-
-    csk2o = csko.compact(true, null); //single, heap, ordered
-    state.check(csk2o);
-
-    csk2o = csko.compact(false, null); //single, heap, unordered
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2o = csku.compact(true, wmem);
-    state.classType = "DirectCompactSketch";
-    state.memory = true;
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2u = csku.compact(false, wmem);
-    state.classType = "DirectCompactSketch";
-    state.check(csk2u);
-
-    wmem = WritableMemory.allocate(16);
-    csk2o = csko.compact(true, wmem);
-    state.classType = "DirectCompactSketch";
-    state.memory = true;
-    state.check(csk2o);
-
-    wmem = WritableMemory.allocate(16);
-    csk2u = csko.compact(false, wmem);
-    state.classType = "DirectCompactSketch";
-    state.check(csk2u);
+    sk.update(2);
+    sk.update(3);
+    
+    CompactSketch csk1 = sk.compact(true, null); //creates a new object
+    State state1 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, ORDERED, !ESTIMATION);
+    state1.check(csk1);
+
+    CompactSketch csk2 = sk.compact(false, null); //creates a new object, unordered
+    State state2 = new State("HeapCompactSketch", 3, 40, COMPACT, !EMPTY, !DIRECT, !MEMORY, !ORDERED, !ESTIMATION);
+    state2.check(csk2);
+    
+    assertNotEquals(csk1, csk2); //order is different and different objects
+    assertFalse(csk1 == csk2);
+    
+    CompactSketch csk3 = csk1.compact(true, null);
+    state1.check(csk3);
+    
+    assertEquals(csk1, csk3); //the same object because wmem = null and csk1.ordered = dstOrdered
+    assertTrue(csk1 == csk3);
+    
+    assertNotEquals(csk2, csk3); //different object because wmem = null and csk2.ordered = false && dstOrdered = true
+    assertFalse(csk2 == csk3);
+    
+    CompactSketch cskc = csk1.compact();
+    state1.check(cskc);
+    
+    assertEquals(csk1, cskc); //the same object
+    assertTrue(csk1 == cskc);
   }
 
   @Test
@@ -471,7 +617,7 @@ public class CompactSketchTest {
       e.printStackTrace();
     }
   }
-
+  
   private static class State {
     String classType = null;
     int count = 0;
@@ -482,8 +628,7 @@ public class CompactSketchTest {
     boolean memory = false;
     boolean ordered = false;
     boolean estimation = false;
-
-
+    
     State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct,
         boolean memory, boolean ordered, boolean estimation) {
       this.classType = classType;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org