You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/07/25 01:22:10 UTC

[incubator-datasketches-java] 01/02: Interim

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Refactor_Intersection
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 7e35246400eb07f2920f1eac5d10169167929e25
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jul 21 12:16:48 2020 -0700

    Interim
---
 .../org/apache/datasketches/HashOperations.java    |  27 +++--
 .../org/apache/datasketches/theta/AnotBimpl.java   |   3 +-
 .../datasketches/theta/IntersectionImpl.java       | 130 +++++++++++++--------
 .../datasketches/theta/IntersectionImplR.java      |  75 +++++++-----
 .../apache/datasketches/theta/SetOperation.java    | 105 ++++++++++-------
 .../org/apache/datasketches/theta/UnionImpl.java   |   5 +-
 .../apache/datasketches/theta/AnotBimplTest.java   |   2 +-
 .../datasketches/theta/DirectIntersectionTest.java |   6 +-
 .../datasketches/theta/HeapIntersectionTest.java   |   8 +-
 .../datasketches/theta/SetOperationTest.java       |  18 +--
 .../apache/datasketches/theta/UnionImplTest.java   |   4 +-
 11 files changed, 228 insertions(+), 155 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/HashOperations.java b/src/main/java/org/apache/datasketches/HashOperations.java
index 9067b79..6541397 100644
--- a/src/main/java/org/apache/datasketches/HashOperations.java
+++ b/src/main/java/org/apache/datasketches/HashOperations.java
@@ -19,7 +19,8 @@
 
 package org.apache.datasketches;
 
-import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
+import static java.lang.Math.max;
+import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
 import static org.apache.datasketches.Util.ceilingPowerOf2;
 
 import org.apache.datasketches.memory.Memory;
@@ -334,17 +335,27 @@ public final class HashOperations {
       final int count,
       final long thetaLong,
       final double rebuildThreshold) {
-    final int size = Math.max(
-      ceilingPowerOf2((int) Math.ceil(count / rebuildThreshold)),
-      1 << MIN_LG_NOM_LONGS
-    );
-    final long[] hashTable = new long[size];
-    hashArrayInsert(
-        hashArr, hashTable, Integer.numberOfTrailingZeros(size), thetaLong);
+    final int lgArrLongs = minLgHashTableSize(count, rebuildThreshold);
+    final int arrLongs = 1 << lgArrLongs;
+    final long[] hashTable = new long[arrLongs];
+    hashArrayInsert(hashArr, hashTable, lgArrLongs, thetaLong);
     return hashTable;
   }
 
   /**
+   * Returns the smallest log hash table size given the count of items and the rebuild threshold.
+   * @param count the given count of items
+   * @param rebuild_threshold the rebuild threshold as a fraction between zero and one.
+   * @return the smallest log hash table size
+   */
+  public static int minLgHashTableSize(final int count, final double rebuild_threshold) {
+    final int upperCount = (int) Math.ceil(count / rebuild_threshold);
+    final int arrLongs = max(ceilingPowerOf2(upperCount), 1 << MIN_LG_ARR_LONGS);
+    final int newLgArrLongs = Integer.numberOfTrailingZeros(arrLongs);
+    return newLgArrLongs;
+  }
+
+  /**
    * Counts the cardinality of the first Log2 values of the given source array.
    * @param srcArr the given source array
    * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>
diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
index 4686cb8..9eecee3 100644
--- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
+++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
@@ -23,6 +23,7 @@ import static org.apache.datasketches.HashOperations.convertToHashTable;
 import static org.apache.datasketches.HashOperations.hashSearch;
 import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.Util.checkSeedHashes;
+import static org.apache.datasketches.Util.computeSeedHash;
 import static org.apache.datasketches.Util.simpleIntLog2;
 
 import java.util.Arrays;
@@ -141,7 +142,7 @@ final class AnotBimpl extends AnotB {
   }
 
   @Override
-  int getRetainedEntries(final boolean valid) {
+  int getRetainedEntries() {
     return curCount_;
   }
 
diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
index 8166fb3..aa49451 100644
--- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
@@ -20,7 +20,13 @@
 package org.apache.datasketches.theta;
 
 import static java.lang.Math.min;
+import static org.apache.datasketches.HashOperations.continueCondition;
+import static org.apache.datasketches.HashOperations.hashInsertOnly;
+import static org.apache.datasketches.HashOperations.hashInsertOnlyMemory;
+import static org.apache.datasketches.HashOperations.hashSearch;
+import static org.apache.datasketches.HashOperations.minLgHashTableSize;
 import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
+import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
 import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
 import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE;
@@ -35,15 +41,16 @@ import static org.apache.datasketches.theta.PreambleUtil.insertCurCount;
 import static org.apache.datasketches.theta.PreambleUtil.insertFamilyID;
 import static org.apache.datasketches.theta.PreambleUtil.insertFlags;
 import static org.apache.datasketches.theta.PreambleUtil.insertLgArrLongs;
+import static org.apache.datasketches.theta.PreambleUtil.insertLgNomLongs;
 import static org.apache.datasketches.theta.PreambleUtil.insertP;
 import static org.apache.datasketches.theta.PreambleUtil.insertPreLongs;
 import static org.apache.datasketches.theta.PreambleUtil.insertSerVer;
 import static org.apache.datasketches.theta.PreambleUtil.insertThetaLong;
+import static org.apache.datasketches.theta.PreambleUtil.setEmpty;
 
 import java.util.Arrays;
 
 import org.apache.datasketches.Family;
-import org.apache.datasketches.HashOperations;
 import org.apache.datasketches.SketchesArgumentException;
 import org.apache.datasketches.Util;
 import org.apache.datasketches.memory.Memory;
@@ -62,12 +69,13 @@ final class IntersectionImpl extends IntersectionImplR {
    * @return a new IntersectionImpl on the Java heap
    */
   static IntersectionImpl initNewHeapInstance(final long seed) {
+    //compute & store seedHash,
     final IntersectionImpl impl = new IntersectionImpl(null, seed, false);
     impl.lgArrLongs_ = 0;
     impl.curCount_ = -1;  //Universal Set is true
     impl.thetaLong_ = Long.MAX_VALUE;
     impl.empty_ = false;  //A virgin intersection represents the Universal Set so empty is FALSE!
-    impl.hashTable_ = null;
+    impl.hashTable_ = null; //retained entries of the intersection as a hash table, on-heap only.
     return impl;
   }
 
@@ -81,27 +89,30 @@ final class IntersectionImpl extends IntersectionImplR {
    * @return a new IntersectionImpl that may be off-heap
    */
   static IntersectionImpl initNewDirectInstance(final long seed, final WritableMemory dstMem) {
-    final IntersectionImpl impl = new IntersectionImpl(dstMem, seed, true);
+    //DstMem: compute & store seedHash, true means no seedhash checking
+    final IntersectionImpl impl = new IntersectionImpl(dstMem, seed, true); //compute & store seedHash
 
     //Load Preamble
+
     insertPreLongs(dstMem, CONST_PREAMBLE_LONGS); //RF not used = 0
     insertSerVer(dstMem, SER_VER);
     insertFamilyID(dstMem, Family.INTERSECTION.getID());
-    //Note: Intersection does not use lgNomLongs or k, per se.
+    insertLgNomLongs(dstMem, 0); //Note: Intersection does not use lgNomLongs or k, per se.
     //set lgArrLongs initially to minimum.  Don't clear cache in mem
     insertLgArrLongs(dstMem, MIN_LG_ARR_LONGS);
     insertFlags(dstMem, 0); //bigEndian = readOnly = compact = ordered = empty = false;
     //seedHash loaded and checked in private constructor
     insertCurCount(dstMem, -1);
     insertP(dstMem, (float) 1.0);
+    //pre2
     insertThetaLong(dstMem, Long.MAX_VALUE);
 
-    //Initialize
+    //internal initialize
     impl.lgArrLongs_ = MIN_LG_ARR_LONGS;
     impl.curCount_ = -1; //set in mem below
     impl.thetaLong_ = Long.MAX_VALUE;
     impl.empty_ = false;
-    impl.maxLgArrLongs_ = checkMaxLgArrLongs(dstMem); //Only Off Heap
+    impl.maxLgArrLongs_ = getMaxLgArrLongs(dstMem); //Only Off Heap
 
     return impl;
   }
@@ -114,6 +125,7 @@ final class IntersectionImpl extends IntersectionImplR {
    * @return a IntersectionImplR instance on the Java heap
    */
   static IntersectionImplR heapifyInstance(final Memory srcMem, final long seed) {
+    //compute & store seedHash,
     final IntersectionImpl impl = new IntersectionImpl(null, seed, false);
 
     //Get Preamble
@@ -164,13 +176,14 @@ final class IntersectionImpl extends IntersectionImplR {
   }
 
   /**
-   * Wrap an Intersection target around the given source Memory containing intersection data.
-   * @param srcMem The source Memory image.
+   * Wrap an Intersection target around the given source WritableMemory containing intersection data.
+   * @param srcMem The source WritableMemory image.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
-   * @return a IntersectionImpl that wraps a source Memory that contains an Intersection image
+   * @return a IntersectionImpl that wraps a source WritableMemory that contains an Intersection image
    */
   static IntersectionImpl wrapInstance(final WritableMemory srcMem, final long seed) {
+    //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed
     final IntersectionImpl impl = new IntersectionImpl(srcMem, seed, false);
     return (IntersectionImpl) internalWrapInstance(srcMem, impl);
   }
@@ -193,31 +206,31 @@ final class IntersectionImpl extends IntersectionImplR {
       curCount_ = 0;
       thetaLong_ = Long.MAX_VALUE;
       empty_ = true;
-      maxLgArrLongs_ = 0;
       hashTable_ = null;
-      if (mem_ != null) {
-        PreambleUtil.setEmpty(mem_); //true
-        insertThetaLong(mem_, thetaLong_);
-        insertCurCount(mem_, 0);
-        insertLgArrLongs(mem_, lgArrLongs_);
+      if (wmem_ != null) {
+        setEmpty(wmem_); //true
+        insertThetaLong(wmem_, thetaLong_);
+        insertCurCount(wmem_, 0);
+        insertLgArrLongs(wmem_, lgArrLongs_);
       }
       return;
     }
     Util.checkSeedHashes(seedHash_, sketchIn.getSeedHash());
     thetaLong_ = min(thetaLong_, sketchIn.getThetaLong()); //Theta rule
     empty_ = false;
-    if (mem_ != null) {
-      insertThetaLong(mem_, thetaLong_);
-      PreambleUtil.clearEmpty(mem_); //false
+    if (wmem_ != null) {
+      insertThetaLong(wmem_, thetaLong_);
+      clearEmpty(wmem_); //false
     }
 
     final int sketchInEntries = sketchIn.getRetainedEntries(true);
 
     // The truth table for the following state machine
     //   Case  curCount  sketchInEntries | Actions
-    //     1      <0            0        | First intersect, curCount = 0; HT = null; exit
+    //     1      <0            0        | First intersect, curCount < 0; HT = null; exit
     //     2       0            0        | CurCount = 0; HT = null; exit
     //     3      >0            0        | CurCount = 0; HT = null; exit
+    //     4                             | Not used
     //     5      <0           >0        | First intersect, clone SketchIn; exit
     //     6       0           >0        | CurCount = 0; HT = null; exit
     //     7      >0           >0        | Perform full intersect
@@ -230,26 +243,27 @@ final class IntersectionImpl extends IntersectionImplR {
       case 6: { //(curCount_ == 0) || (sketchInEntries == 0)
         //All future intersections result in zero data, but theta can still be reduced.
         curCount_ = 0;
-        if (mem_ != null) { insertCurCount(mem_, 0); }
+        if (wmem_ != null) { insertCurCount(wmem_, 0); }
         hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid
         break;
       }
       case 5: { // curCount_ < 0; This is the 1st intersect, clone the incoming sketch
         curCount_ = sketchIn.getRetainedEntries(true);
-        final int requiredLgArrLongs = computeMinLgArrLongsFromCount(curCount_);
+        final int requiredLgArrLongs = minLgHashTableSize(curCount_, REBUILD_THRESHOLD);
         final int priorLgArrLongs = lgArrLongs_; //prior only used in error message
         lgArrLongs_ = requiredLgArrLongs;
 
-        if (mem_ != null) { //Off heap, check if current dstMem is large enough
-          insertCurCount(mem_, curCount_);
-          insertLgArrLongs(mem_, lgArrLongs_);
+        if (wmem_ != null) { //Off heap, check if current dstMem is large enough
+          insertCurCount(wmem_, curCount_);
+          insertLgArrLongs(wmem_, lgArrLongs_);
           if (requiredLgArrLongs <= maxLgArrLongs_) { //OK
-            mem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear only what required
+            wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear only what required
           }
           else { //not enough space in dstMem
+            final int requiredBytes = (8 << requiredLgArrLongs) + 24;
+            final int givenBytes = (8 << priorLgArrLongs) + 24;
             throw new SketchesArgumentException(
-                "Insufficient dstMem hash table space: "
-                    + (1 << requiredLgArrLongs) + " > " + (1 << priorLgArrLongs));
+                "Insufficient internal Memory space: " + requiredBytes + " > " + givenBytes);
           }
         }
         else { //On the heap, allocate a HT
@@ -283,11 +297,35 @@ final class IntersectionImpl extends IntersectionImplR {
     thetaLong_ = Long.MAX_VALUE;
     empty_ = false;
     hashTable_ = null;
-    if (mem_ != null) {
-      insertLgArrLongs(mem_, lgArrLongs_); //make sure
-      insertCurCount(mem_, -1);
-      insertThetaLong(mem_, Long.MAX_VALUE);
-      clearEmpty(mem_);
+    if (wmem_ != null) {
+      insertLgArrLongs(wmem_, 0);
+      insertCurCount(wmem_, -1);
+      insertThetaLong(wmem_, Long.MAX_VALUE);
+      clearEmpty(wmem_);
+    }
+  }
+
+  private void hardReset() { //Universal Set
+    resetToEmpty();
+    curCount_ = -1;
+    empty_ = false;
+    if (wmem_ != null) {
+      clearEmpty(wmem_);
+      insertCurCount(wmem_, -1);
+    }
+  }
+
+  private void resetToEmpty() { //empty state
+    lgArrLongs_ = 0;
+    curCount_ = 0;
+    thetaLong_ = Long.MAX_VALUE;
+    empty_ = true;
+    hashTable_ = null;
+    if (wmem_ != null) {
+      insertLgArrLongs(wmem_, 0);
+      insertCurCount(wmem_, 0);
+      insertThetaLong(wmem_, Long.MAX_VALUE);
+      setEmpty(wmem_); //true
     }
   }
 
@@ -299,10 +337,10 @@ final class IntersectionImpl extends IntersectionImplR {
     final long[] cacheIn = sketchIn.getCache();
     final int arrLongsIn = cacheIn.length;
     final long[] hashTable;
-    if (mem_ != null) {
+    if (wmem_ != null) {
       final int htLen = 1 << lgArrLongs_;
       hashTable = new long[htLen];
-      mem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
+      wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
     } else {
       hashTable = hashTable_;
     }
@@ -318,7 +356,7 @@ final class IntersectionImpl extends IntersectionImplR {
         if (hashIn >= thetaLong_) {
           break; //early stop assumes that hashes in input sketch are ordered!
         }
-        final int foundIdx = HashOperations.hashSearch(hashTable, lgArrLongs_, hashIn);
+        final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn);
         if (foundIdx == -1) { continue; }
         matchSet[matchSetCount++] = hashIn;
       }
@@ -328,18 +366,18 @@ final class IntersectionImpl extends IntersectionImplR {
       for (int i = 0; i < arrLongsIn; i++ ) {
         final long hashIn = cacheIn[i];
         if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
-        final int foundIdx = HashOperations.hashSearch(hashTable, lgArrLongs_, hashIn);
+        final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn);
         if (foundIdx == -1) { continue; }
         matchSet[matchSetCount++] = hashIn;
       }
     }
     //reduce effective array size to minimum
     curCount_ = matchSetCount;
-    lgArrLongs_ = computeMinLgArrLongsFromCount(matchSetCount);
-    if (mem_ != null) {
-      insertCurCount(mem_, matchSetCount);
-      insertLgArrLongs(mem_, lgArrLongs_);
-      mem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear for rebuild
+    lgArrLongs_ = minLgHashTableSize(matchSetCount, REBUILD_THRESHOLD);
+    if (wmem_ != null) {
+      insertCurCount(wmem_, matchSetCount);
+      insertLgArrLongs(wmem_, lgArrLongs_);
+      wmem_.clear(CONST_PREAMBLE_LONGS << 3, 8 << lgArrLongs_); //clear for rebuild
     } else {
       Arrays.fill(hashTable_, 0, 1 << lgArrLongs_, 0L); //clear for rebuild
     }
@@ -356,21 +394,21 @@ final class IntersectionImpl extends IntersectionImplR {
   private void moveDataToTgt(final long[] arr, final int count) {
     final int arrLongsIn = arr.length;
     int tmpCnt = 0;
-    if (mem_ != null) { //Off Heap puts directly into mem
+    if (wmem_ != null) { //Off Heap puts directly into mem
       final int preBytes = CONST_PREAMBLE_LONGS << 3;
       final int lgArrLongs = lgArrLongs_;
       final long thetaLong = thetaLong_;
       for (int i = 0; i < arrLongsIn; i++ ) {
         final long hashIn = arr[i];
-        if (HashOperations.continueCondition(thetaLong, hashIn)) { continue; }
-        HashOperations.hashInsertOnlyMemory(mem_, lgArrLongs, hashIn, preBytes);
+        if (continueCondition(thetaLong, hashIn)) { continue; }
+        hashInsertOnlyMemory(wmem_, lgArrLongs, hashIn, preBytes);
         tmpCnt++;
       }
     } else { //On Heap. Assumes HT exists and is large enough
       for (int i = 0; i < arrLongsIn; i++ ) {
         final long hashIn = arr[i];
-        if (HashOperations.continueCondition(thetaLong_, hashIn)) { continue; }
-        HashOperations.hashInsertOnly(hashTable_, lgArrLongs_, hashIn);
+        if (continueCondition(thetaLong_, hashIn)) { continue; }
+        hashInsertOnly(hashTable_, lgArrLongs_, hashIn);
         tmpCnt++;
       }
     }
diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
index f752a7e..658d293 100644
--- a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
+++ b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
@@ -20,6 +20,7 @@
 package org.apache.datasketches.theta;
 
 import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
+import static org.apache.datasketches.Util.computeSeedHash;
 import static org.apache.datasketches.Util.floorPowerOf2;
 import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
 import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
@@ -57,7 +58,8 @@ import org.apache.datasketches.memory.WritableMemory;
  */
 class IntersectionImplR extends Intersection {
   protected final short seedHash_;
-  protected final WritableMemory mem_;
+  protected final boolean readOnly_;
+  protected final WritableMemory wmem_;
 
   //Note: Intersection does not use lgNomLongs or k, per se.
   protected int lgArrLongs_; //current size of hash table
@@ -66,31 +68,42 @@ class IntersectionImplR extends Intersection {
   protected boolean empty_;
 
   protected long[] hashTable_ = null;  //HT => Data.  Only used On Heap
-  protected int maxLgArrLongs_ = 0; //max size of hash table. Only used Off Heap
+  protected int maxLgArrLongs_ = 0; //max size of wmem_ hash table. Only used Off Heap, never reset.
 
-  protected IntersectionImplR(final WritableMemory mem, final long seed, final boolean newMem) {
-    mem_ = mem;
-    if (mem != null) {
-      if (newMem) {
+  /**
+   * Constructor.
+   * @param wmem Can be either a Source(e.g. wrap) or Destination (new Direct) WritableMemory
+   * @param seed Used to validate incoming sketch arguments
+   * @param dstMemFlag The given memory is a Destination (new Direct) WritableMemory
+   */
+  protected IntersectionImplR(final WritableMemory wmem, final long seed, final boolean dstMemFlag) {
+    readOnly_ = !dstMemFlag;
+    if (wmem != null) {
+      wmem_ = wmem;
+      if (dstMemFlag) { //DstMem: compute & store seedHash, newMem means no seedhash checking
+        checkMinSizeMemory(wmem);
         seedHash_ = computeSeedHash(seed);
-        mem_.putShort(SEED_HASH_SHORT, seedHash_);
-      } else {
-        seedHash_ = mem_.getShort(SEED_HASH_SHORT);
+        wmem_.putShort(SEED_HASH_SHORT, seedHash_);
+      } else { //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed
+        seedHash_ = wmem_.getShort(SEED_HASH_SHORT);
         Util.checkSeedHashes(seedHash_, computeSeedHash(seed)); //check for seed hash conflict
       }
-    } else {
+    } else { //compute & store seedHash
+      wmem_ = null;
       seedHash_ = computeSeedHash(seed);
     }
   }
 
   /**
    * Wrap an Intersection target around the given source Memory containing intersection data.
+   * @See SetOperation#wrap(Memory, long)
    * @param srcMem The source Memory image.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See seed</a>
-   * @return an IntersectionImplR that wraps a read-only Intersection image referenced by srcMem
+   * @return an IntersectionImplR that wraps a read-only Intersection image
    */
   static IntersectionImplR wrapInstance(final Memory srcMem, final long seed) {
+    //SrcMem:gets and stores the seedHash, checks mem_seedHash against the seed
     final IntersectionImplR impl = new IntersectionImplR((WritableMemory) srcMem, seed, false);
     return internalWrapInstance(srcMem, impl);
   }
@@ -134,7 +147,7 @@ class IntersectionImplR extends Intersection {
     impl.curCount_ = curCount;
     impl.thetaLong_ = thetaLong;
     impl.empty_ = empty;
-    impl.maxLgArrLongs_ = checkMaxLgArrLongs(srcMem); //Only Off Heap, check for min size
+    impl.maxLgArrLongs_ = getMaxLgArrLongs(srcMem); //Only Off Heap, check for min size
     return impl;
   }
 
@@ -154,10 +167,10 @@ class IntersectionImplR extends Intersection {
     }
     //else curCount > 0
     final long[] hashTable;
-    if (mem_ != null) {
+    if (wmem_ != null) {
       final int htLen = 1 << lgArrLongs_;
       hashTable = new long[htLen];
-      mem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
+      wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, hashTable, 0, htLen);
     } else {
       hashTable = hashTable_;
     }
@@ -172,13 +185,13 @@ class IntersectionImplR extends Intersection {
    * as the infinite <i>Universal Set</i>.
    */
   @Override
-  int getRetainedEntries(final boolean valid) {
+  int getRetainedEntries() {
     return curCount_;
   }
 
   @Override
   public boolean hasResult() {
-    return (mem_ != null) ? mem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
+    return (wmem_ != null) ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
   }
 
   @Override
@@ -188,7 +201,7 @@ class IntersectionImplR extends Intersection {
 
   @Override
   public boolean isSameResource(final Memory that) {
-    return (mem_ != null) ? mem_.isSameResource(that) : false;
+    return (wmem_ != null) ? wmem_.isSameResource(that) : false;
   }
 
   @Override
@@ -201,8 +214,8 @@ class IntersectionImplR extends Intersection {
     final int preBytes = CONST_PREAMBLE_LONGS << 3;
     final int dataBytes = (curCount_ > 0) ? 8 << lgArrLongs_ : 0;
     final byte[] byteArrOut = new byte[preBytes + dataBytes];
-    if (mem_ != null) {
-      mem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes);
+    if (wmem_ != null) {
+      wmem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes);
     }
     else {
       final WritableMemory memOut = WritableMemory.wrap(byteArrOut);
@@ -253,13 +266,13 @@ class IntersectionImplR extends Intersection {
 
   @Override
   long[] getCache() {
-    if (mem_ == null) {
+    if (wmem_ == null) {
       return (hashTable_ != null) ? hashTable_ : new long[0];
     }
     //Direct
     final int arrLongs = 1 << lgArrLongs_;
     final long[] outArr = new long[arrLongs];
-    mem_.getLongArray(CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs);
+    wmem_.getLongArray(CONST_PREAMBLE_LONGS << 3, outArr, 0, arrLongs);
     return outArr;
   }
 
@@ -274,21 +287,23 @@ class IntersectionImplR extends Intersection {
   }
 
   /**
-   * Returns the correct maximum lgArrLongs given the capacity of the Memory. Checks that the
-   * capacity is large enough for the minimum sized hash table.
+   * Returns the maximum lgArrLongs given the capacity of the Memory.
    * @param dstMem the given Memory
-   * @return the correct maximum lgArrLongs given the capacity of the Memory
+   * @return the maximum lgArrLongs given the capacity of the Memory
    */
-  static final int checkMaxLgArrLongs(final Memory dstMem) {
+  static final int getMaxLgArrLongs(final Memory dstMem) {
     final int preBytes = CONST_PREAMBLE_LONGS << 3;
     final long cap = dstMem.getCapacity();
-    final int maxLgArrLongs =
-        Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3);
-    if (maxLgArrLongs < MIN_LG_ARR_LONGS) {
+    return Integer.numberOfTrailingZeros(floorPowerOf2((int)(cap - preBytes)) >>> 3);
+  }
+
+  static final void checkMinSizeMemory(final Memory mem) {
+    final int minBytes = (CONST_PREAMBLE_LONGS << 3) + (8 << MIN_LG_ARR_LONGS);//280
+    final long cap = mem.getCapacity();
+    if (cap < minBytes) {
       throw new SketchesArgumentException(
-        "dstMem not large enough for minimum sized hash table: " + cap);
+          "Memory must be at least " + minBytes + " bytes. Actual capacity: " + cap);
     }
-    return maxLgArrLongs;
   }
 
   /**
diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java
index b43977c..837d1a9 100644
--- a/src/main/java/org/apache/datasketches/theta/SetOperation.java
+++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java
@@ -19,18 +19,14 @@
 
 package org.apache.datasketches.theta;
 
-import static java.lang.Math.max;
 import static org.apache.datasketches.Family.idToFamily;
 import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
-import static org.apache.datasketches.Util.MIN_LG_ARR_LONGS;
-import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.Util.ceilingPowerOf2;
 import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
 import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
 
 import org.apache.datasketches.Family;
 import org.apache.datasketches.SketchesArgumentException;
-import org.apache.datasketches.Util;
 import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
 
@@ -58,6 +54,10 @@ public abstract class SetOperation {
    * SetOperation using the
    * <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a>.
    * The resulting SetOperation will not retain any link to the source Memory.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * heapified.</p>
+   *
    * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @return a Heap-based SetOperation from the given Memory
@@ -70,6 +70,10 @@ public abstract class SetOperation {
    * Heapify takes the SetOperation image in Memory and instantiates an on-heap
    * SetOperation using the given seed.
    * The resulting SetOperation will not retain any link to the source Memory.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * heapified.</p>
+   *
    * @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
@@ -95,9 +99,12 @@ public abstract class SetOperation {
   /**
    * Wrap takes the SetOperation image in Memory and refers to it directly.
    * There is no data copying onto the java heap.
-   * Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
    * This method assumes the
    * <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a>.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * wrapped.</p>
+   *
    * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @return a SetOperation backed by the given Memory
@@ -109,7 +116,10 @@ public abstract class SetOperation {
   /**
    * Wrap takes the SetOperation image in Memory and refers to it directly.
    * There is no data copying onto the java heap.
-   * Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * wrapped.</p>
+   *
    * @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
@@ -122,7 +132,7 @@ public abstract class SetOperation {
     if (serVer != 3) {
       throw new SketchesArgumentException("SerVer must be 3: " + serVer);
     }
-    switch (family) { //TODO Do we need to add the stateful AnotB ?
+    switch (family) {
       case UNION : {
         return UnionImpl.wrapInstance(srcMem, seed);
       }
@@ -137,9 +147,12 @@ public abstract class SetOperation {
   /**
    * Wrap takes the SetOperation image in Memory and refers to it directly.
    * There is no data copying onto the java heap.
-   * Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
    * This method assumes the
    * <a href="{@docRoot}/resources/dictionary.html#defaultUpdateSeed">Default Update Seed</a>.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * wrapped.</p>
+   *
    * @param srcMem an image of a SetOperation where the image seed hash matches the default seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @return a SetOperation backed by the given Memory
@@ -151,7 +164,10 @@ public abstract class SetOperation {
   /**
    * Wrap takes the SetOperation image in Memory and refers to it directly.
    * There is no data copying onto the java heap.
-   * Only "Direct" SetOperations that have been explicitly stored as direct can be wrapped.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized and thus
+   * wrapped.</p>
+   *
    * @param srcMem an image of a SetOperation where the hash of the given seed matches the image seed hash.
    * <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
    * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
@@ -202,17 +218,16 @@ public abstract class SetOperation {
   }
 
   /**
-   * Returns the maximum number of bytes for the returned CompactSketch, given the maximum
+   * Returns the maximum number of bytes for the returned CompactSketch, given the
    * value of nomEntries of the first sketch A of AnotB.
-   * @param maxNomEntries the given value must be a power of 2.
+   * @param nomEntries this value must be a power of 2.
    * @return the maximum number of bytes.
    */
-  public static int getMaxAnotBResultBytes(final int maxNomEntries) {
-    final int ceil = ceilingPowerOf2(maxNomEntries);
+  public static int getMaxAnotBResultBytes(final int nomEntries) {
+    final int ceil = ceilingPowerOf2(nomEntries);
     return 24 + (15 * ceil);
   }
 
-
   /**
    * Gets the Family of this SetOperation
    * @return the Family of this SetOperation
@@ -223,6 +238,10 @@ public abstract class SetOperation {
    * Returns true if the backing resource of <i>this</i> is identical with the backing resource
    * of <i>that</i>. The capacities must be the same.  If <i>this</i> is a region,
    * the region offset must also be the same.
+   *
+   * <p>Note: Only certain set operators during stateful operations can be serialized.
+   * Only when they are stored into Memory will this be relevant.</p>
+   *
    * @param that A different non-null object
    * @return true if the backing resource of <i>this</i> is the same as the backing resource
    * of <i>that</i>.
@@ -231,45 +250,43 @@ public abstract class SetOperation {
 
   //restricted
 
+  /**
+   * Gets the hash array in compact form.
+   * This is only useful during stateful operations.
+   * This should never be made public.
+   * @return the hash array
+   */
   abstract long[] getCache();
 
-  //intentionally not made public because behavior will be confusing to end user.
-  abstract int getRetainedEntries(boolean valid);
+  /**
+   * Gets the current count of retained entries.
+   * This is only useful during stateful operations.
+   * Intentionally not made public because behavior will be confusing to end user.
+   *
+   * @return Gets the current count of retained entries.
+   */
+  abstract int getRetainedEntries();
 
+  /**
+   * Returns the seedHash established during class construction.
+   * @return the seedHash.
+   */
   abstract short getSeedHash();
 
-  abstract long getThetaLong();
-
-  static short computeSeedHash(final long seed) {
-    return Util.computeSeedHash(seed);
-  }
-
-  //intentionally not made public because behavior will be confusing to end user.
-  abstract boolean isEmpty();
-
   /**
-   * Computes minimum lgArrLongs from a current count.
-   * @param count the given current count
-   * @return the minimum lgArrLongs from a current count.
+   * Gets the current value of ThetaLong.
+   * Only useful during stateful operations.
+   * Intentionally not made public because behavior will be confusing to end user.
+   * @return the current value of ThetaLong.
    */
-  //Used by intersection and AnotB
-  static final int computeMinLgArrLongsFromCount(final int count) {
-    final int upperCount = (int) Math.ceil(count / REBUILD_THRESHOLD);
-    final int arrLongs = max(ceilingPowerOf2(upperCount), 1 << MIN_LG_ARR_LONGS);
-    final int newLgArrLongs = Integer.numberOfTrailingZeros(arrLongs);
-    return newLgArrLongs;
-  }
+  abstract long getThetaLong();
 
   /**
-   * Returns true if given Family id is one of the set operations
-   * @param id the given Family id
-   * @return true if given Family id is one of the set operations
+   * Returns true if this set operator is empty.
+   * Only useful during stateful operations.
+   * Intentionally not made public because behavior will be confusing to end user.
+   * @return true if this set operator is empty.
    */
-  static boolean isValidSetOpID(final int id) {
-    final Family family = Family.idToFamily(id);
-    final boolean ret = ((family == Family.UNION) || (family == Family.INTERSECTION)
-        || (family == Family.A_NOT_B));
-    return ret;
-  }
+  abstract boolean isEmpty();
 
 }
diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
index a1b20bc..72515f6 100644
--- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
@@ -22,6 +22,7 @@ package org.apache.datasketches.theta;
 import static java.lang.Math.min;
 import static org.apache.datasketches.QuickSelect.selectExcludingZeros;
 import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.computeSeedHash;
 import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
 import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
 import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
@@ -477,8 +478,8 @@ final class UnionImpl extends Union {
   }
 
   @Override
-  int getRetainedEntries(final boolean valid) {
-    return gadget_.getRetainedEntries(valid);
+  int getRetainedEntries() {
+    return gadget_.getRetainedEntries(true);
   }
 
   @Override
diff --git a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
index c7cc1ef..5c61c01 100644
--- a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
@@ -59,7 +59,7 @@ public class AnotBimplTest {
 
     aNb.setA(usk1);
     aNb.notB(usk2);
-    assertEquals(aNb.getRetainedEntries(true), k/2);
+    assertEquals(aNb.getRetainedEntries(), k/2);
 
     CompactSketch rsk1;
 
diff --git a/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java b/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java
index f9ede9e..062c190 100644
--- a/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java
+++ b/src/test/java/org/apache/datasketches/theta/DirectIntersectionTest.java
@@ -69,7 +69,7 @@ public class DirectIntersectionTest {
     inter.intersect(usk1);
     inter.intersect(usk2);
 
-    long[] cache = inter.getCache();
+    long[] cache = inter.getCache(); //only applies to stateful
     assertEquals(cache.length, 32);
 
     CompactSketch rsk1;
@@ -454,7 +454,7 @@ public class DirectIntersectionTest {
   @Test
   public void checkWrapVirginEmpty() {
     int lgK = 5;
-    int k = 1<<lgK;
+    int k = 1 << lgK;
     Intersection inter1, inter2;
     UpdateSketch sk1;
 
@@ -484,7 +484,7 @@ public class DirectIntersectionTest {
 
     //test the path via toByteArray, wrap, now in a different state
     iMem = WritableMemory.wrap(inter1.toByteArray());
-    inter2 = Sketches.wrapIntersection(iMem);
+    inter2 = Sketches.wrapIntersection((Memory)iMem);
     assertTrue(inter2.hasResult()); //still true
 
     //test the compaction path
diff --git a/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java b/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java
index 179e847..2f6fa8f 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapIntersectionTest.java
@@ -472,10 +472,10 @@ public class HeapIntersectionTest {
     Intersection inter = Sketches.setOperationBuilder().buildIntersection();
     inter.intersect(usk);
     assertTrue(inter.isEmpty());
-    assertEquals(inter.getRetainedEntries(true), 0);
+    assertEquals(inter.getRetainedEntries(), 0);
     assertTrue(inter.getSeedHash() != 0);
     assertEquals(inter.getThetaLong(), Long.MAX_VALUE);
-    long[] longArr = inter.getCache();
+    long[] longArr = inter.getCache(); //only applies to stateful
     assertEquals(longArr.length, 0);
   }
 
@@ -486,10 +486,10 @@ public class HeapIntersectionTest {
     Intersection inter = Sketches.setOperationBuilder().buildIntersection();
     inter.intersect(usk);
     assertFalse(inter.isEmpty());
-    assertEquals(inter.getRetainedEntries(true), 1);
+    assertEquals(inter.getRetainedEntries(), 1);
     assertTrue(inter.getSeedHash() != 0);
     assertEquals(inter.getThetaLong(), Long.MAX_VALUE);
-    long[] longArr = inter.getCache();
+    long[] longArr = inter.getCache(); //only applies to stateful
     assertEquals(longArr.length, 32);
   }
 
diff --git a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
index ca32df4..a599f40 100644
--- a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
@@ -19,11 +19,9 @@
 
 package org.apache.datasketches.theta;
 
-import static org.apache.datasketches.Family.A_NOT_B;
-import static org.apache.datasketches.Family.INTERSECTION;
-import static org.apache.datasketches.Family.UNION;
+import static org.apache.datasketches.HashOperations.minLgHashTableSize;
 import static org.apache.datasketches.ResizeFactor.X4;
-import static org.apache.datasketches.theta.SetOperation.computeMinLgArrLongsFromCount;
+import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.theta.Sketch.getMaxUpdateSketchBytes;
 import static org.testng.Assert.assertEquals;
 //import static org.testng.Assert.assertTrue;
@@ -227,8 +225,8 @@ public class SetOperationTest {
 
   @Test
   public void checkComputeLgArrLongs() {
-    assertEquals(computeMinLgArrLongsFromCount(30), 5);
-    assertEquals(computeMinLgArrLongsFromCount(31), 6);
+    assertEquals(minLgHashTableSize(30, REBUILD_THRESHOLD), 5);
+    assertEquals(minLgHashTableSize(31, REBUILD_THRESHOLD), 6);
   }
 
   /**
@@ -280,14 +278,6 @@ public class SetOperationTest {
   }
 
   @Test
-  public void checkValidSetOpID() {
-    assertFalse(SetOperation.isValidSetOpID(1)); //Alpha
-    assertTrue(SetOperation.isValidSetOpID(UNION.getID()));
-    assertTrue(SetOperation.isValidSetOpID(INTERSECTION.getID()));
-    assertTrue(SetOperation.isValidSetOpID(A_NOT_B.getID()));
-  }
-
-  @Test
   public void setOpsExample() {
     println("Set Operations Example:");
     int k = 4096;
diff --git a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
index dbb359d..c8e8817 100644
--- a/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/UnionImplTest.java
@@ -193,8 +193,8 @@ public class UnionImplTest {
     assertTrue(union.isEmpty());
     assertEquals(union.getThetaLong(), Long.MAX_VALUE);
     assertEquals(union.getSeedHash(), Util.computeSeedHash(DEFAULT_UPDATE_SEED));
-    assertEquals(union.getRetainedEntries(true), 0);
-    assertEquals(union.getCache().length, 128);
+    assertEquals(union.getRetainedEntries(), 0);
+    assertEquals(union.getCache().length, 128); //only applies to stateful
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org