You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/07/07 23:37:04 UTC

[incubator-datasketches-java] 01/10: Interim. This removed much of the code in the PairwiseSetOperations class and replaced it with references to the now equivalent classes that extend SetOperation.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Refactor_Theta_Tuple
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit 85d44429a1b032c5ff1758c2e595b61b44613d7a
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Fri Jun 26 16:43:41 2020 -0700

    Interim.  This removed much of the code in the PairwiseSetOperations
    class and replaced it with references to the now equivalent classes that
    extend SetOperation.
---
 .../java/org/apache/datasketches/theta/AnotB.java  |  31 +-
 .../org/apache/datasketches/theta/AnotBimpl.java   | 321 ++-------------------
 .../datasketches/theta/IntersectionImplR.java      |   1 -
 .../datasketches/theta/PairwiseSetOperations.java  | 156 +---------
 .../apache/datasketches/theta/SetOperation.java    |  13 -
 .../apache/datasketches/theta/AnotBimplTest.java   |   7 +-
 .../datasketches/theta/SetOpsCornerCasesTest.java  |  32 +-
 7 files changed, 67 insertions(+), 494 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/theta/AnotB.java b/src/main/java/org/apache/datasketches/theta/AnotB.java
index 4a4527a..0c90581 100644
--- a/src/main/java/org/apache/datasketches/theta/AnotB.java
+++ b/src/main/java/org/apache/datasketches/theta/AnotB.java
@@ -24,18 +24,33 @@ import org.apache.datasketches.memory.WritableMemory;
 
 /**
  * The API for the set difference operation <i>A and not B</i> operations.
- * This is a stateless operation. However, to make the API
- * more consistent with the other set operations the intended use is:
+ * This class include both stateful and stateless operations.
+ *
+ * <p>The stateful operation is as follows:</p>
+ * <pre><code>
+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * anotb.setA(Sketch skA); //The first argument.
+ * anotb.notB(Sketch skB); //The second (subtraction) argument.
+ * anotb.notB(Sketch skC); // ...any number of additional subtractions...
+ * anotb.getResult(false); //Get an interim result.
+ * anotb.notB(Sketch skD); //Additional subtractions.
+ * anotb.getResult(true);  //Final result and resets the AnotB operator.
+ * </code></pre>
+ *
+ * <p>The stateless operation is as follows:</p>
  * <pre><code>
- * AnotB aNotB = SetOperationBuilder.buildAnotB();
- * aNotB.update(SketchA, SketchB); //Called only once.
- * CompactSketch result = aNotB.getResult();
+ * AnotB anotb = SetOperationBuilder.buildAnotB();
+ *
+ * CompactSketch csk = anotb.aNotB(Sketch skA, Sketch skB);
  * </code></pre>
  *
- * <p>Calling the update function a second time essentially clears the internal state and updates
- * with the new pair of sketches.
+ * <p>Calling the <i>setA</i> operation a second time essentially clears the internal state and loads
+ * the new sketch.</p>
  *
- * <p>As an alternative, one can use the aNotB method that returns the result immediately.
+ * <p>The stateless and stateful operations are independent of each other with the exception of
+ * sharing the same update hash seed loaded as the default seed or specified by the user as an
+ * argument to the builder.</p>
  *
  * @author Lee Rhodes
  */
diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
index 576af7b..752baac 100644
--- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
+++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
@@ -19,16 +19,11 @@
 
 package org.apache.datasketches.theta;
 
-import static java.lang.Math.min;
 import static org.apache.datasketches.HashOperations.convertToHashTable;
-import static org.apache.datasketches.HashOperations.hashArrayInsert;
 import static org.apache.datasketches.HashOperations.hashSearch;
 import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
 import static org.apache.datasketches.Util.checkSeedHashes;
 import static org.apache.datasketches.Util.simpleIntLog2;
-import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
 
 import java.util.Arrays;
 
@@ -36,7 +31,7 @@ import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableMemory;
 
 /**
- * Implements the A-and-not-B operation.
+ * Implements the A-and-not-B operations.
  * @author Lee Rhodes
  * @author Kevin Lang
  */
@@ -47,12 +42,6 @@ final class AnotBimpl extends AnotB {
   private long[] hashArr_; //compact array w curCount_ entries
   private int curCount_;
 
-  //Remove all 4 of these with deprecated
-  private Sketch skA_;
-  private Sketch skB_;
-  private int lgArrLongsHT_; //for Hash Table only. may not need to be member after refactoring
-  private long[] bHashTable_; //may not need to be member after refactoring.
-
   /**
    * Construct a new AnotB SetOperation on the java heap.  Called by SetOperation.Builder.
    *
@@ -148,55 +137,12 @@ final class AnotBimpl extends AnotB {
   public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem,
       final boolean reset) {
     final CompactSketch result =
-        getResult(hashArr_, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem);
+        CompactOperations.componentsToCompact(
+            thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstMem, hashArr_);
     if (reset) { reset(); }
     return result;
   }
 
-  private static CompactSketch getResult(
-      final long[] hashArr,
-      final boolean empty,
-      final short seedHash,
-      final int curCount,
-      final long thetaLong,
-      final boolean dstOrdered,
-      final WritableMemory dstMem) {
-    final CompactSketch result;
-    if (dstMem == null) { //Heap
-      if (empty) { return EmptyCompactSketch.getInstance(); }
-      if (curCount == 1) { return new SingleItemSketch(hashArr[0], seedHash); }
-      //curCount > 1
-      if (dstOrdered) {
-        Arrays.sort(hashArr);
-        result = new HeapCompactOrderedSketch(hashArr, false, seedHash, curCount, thetaLong);
-      } else {
-        result = new HeapCompactUnorderedSketch(hashArr, false, seedHash, curCount, thetaLong);
-      }
-    }
-    else { //Direct
-      if (empty) {
-        dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8);
-        return EmptyCompactSketch.getInstance();
-      }
-      if (curCount == 1) {
-        final SingleItemSketch sis = new SingleItemSketch(hashArr[0], seedHash);
-        dstMem.putByteArray(0, sis.toByteArray(), 0, 16);
-      }
-      final int preLongs = CompactOperations.computeCompactPreLongs(thetaLong, false, curCount);
-      if (dstOrdered) {
-        final byte flags = (byte)(READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | ORDERED_FLAG_MASK);
-        Arrays.sort(hashArr);
-        CompactOperations.loadCompactMemory(hashArr, seedHash, curCount, thetaLong, dstMem, flags, preLongs);
-        result = new DirectCompactOrderedSketch(dstMem);
-      } else {
-        final byte flags = (byte)(READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK);
-        CompactOperations.loadCompactMemory(hashArr, seedHash, curCount, thetaLong, dstMem, flags, preLongs);
-        result = new DirectCompactUnorderedSketch(dstMem);
-      }
-    }
-    return result;
-  }
-
   @Override
   public CompactSketch aNotB(final Sketch skA, final Sketch skB, final boolean dstOrdered,
       final WritableMemory dstMem) {
@@ -245,7 +191,8 @@ final class AnotBimpl extends AnotB {
     empty = ((nonMatches == 0) && (thetaLong == Long.MAX_VALUE));
     final long[] hashArrOut = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
     final CompactSketch result =
-        AnotBimpl.getResult(hashArrOut, empty, seedHash_, nonMatches, thetaLong, dstOrdered, dstMem);
+        CompactOperations.componentsToCompact(
+            thetaLong, nonMatches, seedHash_, empty, true, false, dstOrdered, dstMem, hashArrOut);
     return result;
   }
 
@@ -264,249 +211,13 @@ final class AnotBimpl extends AnotB {
     return false;
   }
 
-  //Deprecated methods
-
-  @Deprecated
-  @Override
-  public void update(final Sketch a, final Sketch b) {
-    skA_ = a;
-    skB_ = b;
-    thetaLong_ = Long.MAX_VALUE; //*
-    empty_ = true; //*
-    hashArr_ = null; //*
-    curCount_ = 0; //*
-    lgArrLongsHT_ = 5;
-    bHashTable_ = null;
-    compute();
-  }
-
-  @Deprecated
-  @Override
-  public CompactSketch getResult() {
-    return getResult(true, null);
-  }
-
-  @Deprecated
-  @Override
-  public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) {
-    final long[] compactCache = (curCount_ <= 0)
-        ? new long[0]
-        : Arrays.copyOfRange(hashArr_, 0, curCount_);
-    if (dstOrdered && (curCount_ > 1)) {
-      Arrays.sort(compactCache);
-    }
-    //Create the CompactSketch
-    final CompactSketch csk = CompactOperations.componentsToCompact(
-        thetaLong_, curCount_, seedHash_, empty_, true, dstOrdered, dstOrdered, dstMem, compactCache);
-    reset();
-    return csk;
-  }
-
   //restricted
 
-  void compute() {
-    final int swA = ((skA_ == null) || (skA_ instanceof EmptyCompactSketch))
-        ? 0
-        : (skA_.isEmpty())
-          ? 1
-          : (skA_ instanceof UpdateSketch)
-            ? 4
-            : (skA_.isOrdered())
-              ? 3
-              : 2;
-    final int swB = ((skB_ == null) || (skB_ instanceof EmptyCompactSketch))
-        ? 0
-        : (skB_.isEmpty())
-          ? 1
-          : (skB_ instanceof UpdateSketch)
-            ? 4
-            : (skB_.isOrdered())
-              ? 3
-              : 2;
-    final int sw = (swA * 8) | swB;
-
-    //  NOTES:
-    //    In the table below, A and B refer to the two input sketches in the order A-not-B.
-    //    The Theta rule: min(ThetaA, ThetaB)
-    //    The Empty rule: Whatever the empty state of A is: E(A)
-    //    The Return triple is defined as: (Theta, Count, EmptyFlag).
-    //    bHashTable temporarily stores the values of B.
-    //    A sketch in stored form can be in one of 5 states.
-    //    Null is not actually a state, but is included for completeness.
-    //    Null is interpreted as {Theta = 1.0, count = 0, empty = true}.
-    //    In some cases the empty state may have Theta < 1.0 but it is ignored; count must be zero.
-    //    State:
-    //      0 N Null or instance of EmptyCompactSketch
-    //      1 E Empty bit set
-    //      2 C Compact, not ordered
-    //      3 O Compact Ordered
-    //      4 H Hash-Table
-    //
-    //A    B    swA  swB  Case  Actions
-    //N    N    0    0    0     Return (1.0, 0, T)
-    //N    E    0    1    1     CheckB,  Return (1.0, 0, T)
-    //N    C    0    2    2     CheckB,  Return (1.0, 0, T)
-    //N    O    0    3    3     CheckB,  Return (1.0, 0, T)
-    //N    H    0    4    4     CheckB,  Return (1.0, 0, T)
-    //E    N    1    0    8     CheckA,  Return (1.0, 0, T)
-    //E    E    1    1    9     CheckAB, Return (1.0, 0, T)
-    //E    C    1    2    10    CheckAB, Return (1.0, 0, T)
-    //E    O    1    3    11    CheckAB, Return (1.0, 0, T)
-    //E    H    1    4    12    CheckAB, Return (1.0, 0, T)
-    //C    N    2    0    16    CheckA,  Return (ThA, |A|, F), copyA
-    //C    E    2    1    17    CheckAB, Return (ThA, |A|, F)), copyA
-    //C    C    2    2    18    CheckAB, B -> H; => C,H; scanAllAsearchB()
-    //C    O    2    3    19    CheckAB, B -> H; => C,H; scanAllAsearchB()
-    //C    H    2    4    20    CheckAB, scanAllAsearchB()
-    //O    N    3    0    24    CheckA,  Return (ThA, |A|, F), copyA
-    //O    E    3    1    25    CheckAB, Return (ThA, |A|, F), copyA
-    //O    C    3    2    26    CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
-    //O    O    3    3    27    CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
-    //O    H    3    4    28    CheckAB, scanEarlyStopAsearchB()
-    //H    N    4    0    32    CheckA,  Return (ThA, |A|, F), copyA
-    //H    E    4    1    33    CheckAB, Return (ThA, |A|, F), copyA
-    //H    C    4    2    34    CheckAB, B -> H; => H,H; scanAllAsearchB()
-    //H    O    4    3    35    CheckAB, B -> H; => H,H; scanAllAsearchB()
-    //H    H    4    4    36    CheckAB, scanAllAsearchB()
-
-    switch (sw) {
-      case 0 :  //A Null, B Null;    Return (1.0, 0, T)
-        thetaLong_ = Long.MAX_VALUE;
-        empty_ = true;
-        break;
-
-      case 10:   //A Empty, B Compact; CheckAB, Return (1.0, 0, T)
-      case 11:   //A Empty, B Ordered; CheckAB, Return (1.0, 0, T)
-      case 12:   //A Empty, B HashTbl; CheckAB, Return (1.0, 0, T)
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        //$FALL-THROUGH$
-      case 1:    //A Null, B Empty;   CheckB,  Return (1.0, 0, T)
-      case 2:    //A Null, B Compact; CheckB,  Return (1.0, 0, T)
-      case 3:    //A Null, B Ordered; CheckB,  Return (1.0, 0, T)
-      case 4:    //A Null, B HashTbl; CheckB,  Return (1.0, 0, T)
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = Long.MAX_VALUE;
-        empty_ = true;
-        break;
-
-      case 9:   //A Empty, B Empty;   CheckAB, Return (1.0, 0, T)
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        //$FALL-THROUGH$
-      case 8:   //A Empty, B Null;    CheckA,  Return (1.0, 0, T)
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = Long.MAX_VALUE;
-        empty_ = true;
-        break;
-
-      case 17:   //A Compact, B Empty; CheckAB, Return (ThA, |A|, F), copyA
-      case 25:   //A Ordered, B Empty; CheckAB, Return (ThA, |A|, F), copyA
-      case 33:  //A HashTbl, B Empty; CheckAB, Return (ThA, |A|, F), copyA
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        //$FALL-THROUGH$
-      case 16:   //A Compact, B Null;  CheckA,  Return (ThA, |A|, F), copyA
-      case 24:   //A Ordered, B Null;  CheckA,  Return (ThA, |A|, F), copyA
-      case 32:  //A HashTbl, B Null;  CheckA,  Return (ThA, |A|, F), copyA
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = skA_.getThetaLong();
-        empty_ = false;
-        curCount_ = skA_.getRetainedEntries(true);
-        hashArr_ = CompactOperations.compactCache(skA_.getCache(), curCount_, thetaLong_, false);
-        break;
-
-      case 18:   //A Compact, B Compact; CheckAB, B -> H; => C,H; scanAllAsearchB()
-      case 19:   //A Compact, B Ordered; CheckAB, B -> H; => C,H; scanAllAsearchB()
-      case 34:   //A HashTbl, B Compact; CheckAB, B -> H; => H,H; scanAllAsearchB()
-      case 35:  //A HashTbl, B Ordered; CheckAB, B -> H; => H,H; scanAllAsearchB()
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
-        empty_ = false;
-        convertBtoHT();
-        scanAllAsearchB();
-        break;
-
-      case 26:   //A Ordered, B Compact; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
-      case 27:  //A Ordered, B Ordered; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
-        empty_ = false;
-        convertBtoHT();
-        scanEarlyStopAsearchB();
-        break;
-
-      case 20:   //A Compact, B HashTbl; CheckAB, scanAllAsearchB()
-      case 36:  //A HashTbl, B HashTbl; CheckAB, scanAllAsearchB()
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
-        empty_ = false;
-        lgArrLongsHT_ = ((UpdateSketch)skB_).getLgArrLongs();
-        bHashTable_ = skB_.getCache();
-        scanAllAsearchB();
-        break;
-
-      case 28:  //A Ordered, B HashTbl; CheckAB, scanEarlyStopAsearchB()
-        checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
-        thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
-        empty_ = false;
-        lgArrLongsHT_ = ((UpdateSketch)skB_).getLgArrLongs();
-        bHashTable_ = skB_.getCache();
-        scanEarlyStopAsearchB();
-        break;
-
-      //default: //This cannot happen and cannot be tested
-    }
-  }
-
-  private void convertBtoHT() {
-    final int curCountB = skB_.getRetainedEntries(true);
-    lgArrLongsHT_ = computeMinLgArrLongsFromCount(curCountB);
-    bHashTable_ = new long[1 << lgArrLongsHT_];
-    hashArrayInsert(skB_.getCache(), bHashTable_, lgArrLongsHT_, thetaLong_);
-  }
-
-  //Sketch A is either unordered compact or hash table
-  private void scanAllAsearchB() {
-    final long[] scanAArr = skA_.getCache();
-    final int arrLongsIn = scanAArr.length;
-    hashArr_ = new long[arrLongsIn];
-    for (int i = 0; i < arrLongsIn; i++ ) {
-      final long hashIn = scanAArr[i];
-      if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
-      final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
-      if (foundIdx > -1) { continue; }
-      hashArr_[curCount_++] = hashIn;
-    }
-  }
-
-  //Sketch A is ordered compact, which enables early stop
-  private void scanEarlyStopAsearchB() {
-    final long[] scanAArr = skA_.getCache();
-    final int arrLongsIn = scanAArr.length;
-    hashArr_ = new long[arrLongsIn]; //maybe 2x what is needed, but getRetainedEntries can be slow.
-    for (int i = 0; i < arrLongsIn; i++ ) {
-      final long hashIn = scanAArr[i];
-      if (hashIn <= 0L) { continue; }
-      if (hashIn >= thetaLong_) {
-        break; //early stop assumes that hashes in input sketch are ordered!
-      }
-      final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
-      if (foundIdx > -1) { continue; }
-      hashArr_[curCount_++] = hashIn;
-    }
-  }
-
   private void reset() {
-    skA_ = null;
-    skB_ = null;
     thetaLong_ = Long.MAX_VALUE;
     empty_ = true;
     hashArr_ = null;
     curCount_ = 0;
-    lgArrLongsHT_ = 5;
-    bHashTable_ = null;
   }
 
   @Override
@@ -524,4 +235,26 @@ final class AnotBimpl extends AnotB {
     return thetaLong_;
   }
 
+  //Deprecated methods
+
+  @Deprecated
+  @Override
+  public void update(final Sketch skA, final Sketch skB) {
+    reset();
+    setA(skA);
+    notB(skB);
+  }
+
+  @Deprecated
+  @Override
+  public CompactSketch getResult() {
+    return getResult(true, null);
+  }
+
+  @Deprecated
+  @Override
+  public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) {
+    return getResult(dstOrdered, dstMem, true);
+  }
+
 }
diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
index 6b79a24..65359d2 100644
--- a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
+++ b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
@@ -173,7 +173,6 @@ class IntersectionImplR extends Intersection {
     }
     compactCacheR = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered);
 
-    //Create the CompactSketch
     return CompactOperations.componentsToCompact(
         thetaLong_, curCount_, seedHash_, empty_, true, dstOrdered, dstOrdered, dstMem, compactCacheR);
   }
diff --git a/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java b/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
index 3ffa6ac..fc24f53 100644
--- a/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
@@ -20,13 +20,6 @@
 package org.apache.datasketches.theta;
 
 import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
-import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
-import static org.apache.datasketches.Util.checkSeedHashes;
-import static org.apache.datasketches.Util.computeSeedHash;
-
-import java.util.Arrays;
-
-import org.apache.datasketches.SketchesArgumentException;
 
 /**
  * Set Operations where the arguments are presented in pairs as in <i>C = Op(A,B)</i>. These are
@@ -37,7 +30,7 @@ import org.apache.datasketches.SketchesArgumentException;
  *
  * @author Lee Rhodes
  * @deprecated  This class has been deprecated as equivalent functionality has been added to the
- * SetOperation classes: Union, Intersection and AnotB.
+ * SetOperation classes: {@link Union}, {@link Intersection} and {@link AnotB}.
  */
 @Deprecated
 public class PairwiseSetOperations {
@@ -55,13 +48,8 @@ public class PairwiseSetOperations {
    */
   @Deprecated
   public static CompactSketch intersect(final Sketch skA, final Sketch skB) {
-    if (((skA == null) || (skA instanceof EmptyCompactSketch))
-        || ((skB == null) || (skB instanceof EmptyCompactSketch))) {
-      return EmptyCompactSketch.getInstance();
-    }
-    final short seedHash = skA.getSeedHash();
-    final Intersection inter = new IntersectionImpl(seedHash);
-    return inter.intersect(skA, skB, true, null);
+    final Intersection inter = new SetOperationBuilder().buildIntersection();
+    return inter.intersect(skA, skB);
   }
 
   /**
@@ -77,14 +65,8 @@ public class PairwiseSetOperations {
    */
   @Deprecated
   public static CompactSketch aNotB(final Sketch skA, final Sketch skB) {
-    if (((skA == null) || (skA instanceof EmptyCompactSketch))
-        && ((skB == null) || (skB instanceof EmptyCompactSketch))) {
-      return EmptyCompactSketch.getInstance();
-    }
-    final short seedHash = ((skA == null) || (skA instanceof EmptyCompactSketch))
-        ? skB.getSeedHash() : skA.getSeedHash(); // lgtm [java/dereferenced-value-may-be-null]
-    final AnotBimpl anotb = new AnotBimpl(seedHash);
-    return anotb.aNotB(skA, skB, true, null);
+    final AnotB anotb = new SetOperationBuilder().buildANotB();
+    return anotb.aNotB(skA, skB);
   }
 
   /**
@@ -123,133 +105,9 @@ public class PairwiseSetOperations {
    * complete seed handling.
    */
   @Deprecated
-  @SuppressWarnings("null")
   public static CompactSketch union(final CompactSketch skA, final CompactSketch skB, final int k) {
-    //Handle all corner cases with null or empty arguments
-    //For backward compatibility, we must allow input empties with Theta < 1.0.
-    final int swA, swB;
-    swA = ((skA == null) || (skA instanceof EmptyCompactSketch)) ? 0 : 2;
-    swB = ((skB == null) || (skB instanceof EmptyCompactSketch)) ? 0 : 1;
-    final int sw = swA | swB;
-    switch (sw) {
-      case 0: { //skA == null/ECS;  skB == null/ECS; return EmptyCompactSketch.
-        return EmptyCompactSketch.getInstance();
-      }
-      case 1: { //skA == null/ECS;  skB == valid; return skB
-        checkOrdered(skB);
-        return maybeCutback(skB, k);
-      }
-      case 2: { //skA == valid; skB == null/ECS; return skA
-        checkOrdered(skA);
-        return maybeCutback(skA, k);
-      }
-      case 3: { //skA == valid; skB == valid; perform full union
-        checkOrdered(skA);
-        checkOrdered(skB);
-        seedHashesCheck(skA, skB);
-        break;
-      }
-      //default: cannot happen
-    }
-
-    //Both sketches are valid with matching seedhashes and ordered
-    //Full Union operation:
-    final long thetaLongA = skA.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null]
-    final long thetaLongB = skB.getThetaLong(); //lgtm [java/dereferenced-value-may-be-null]
-    long thetaLong = Math.min(thetaLongA, thetaLongB); //Theta rule
-    final long[] cacheA = (skA.hasMemory()) ? skA.getCache() : skA.getCache().clone();
-    final long[] cacheB = (skB.hasMemory()) ? skB.getCache() : skB.getCache().clone();
-    final int aLen = cacheA.length;
-    final int bLen = cacheB.length;
-
-    final long[] outCache = new long[aLen + bLen];
-
-    int indexA = 0;
-    int indexB = 0;
-    int indexOut = 0;
-    long hashA = (aLen == 0) ? thetaLong : cacheA[indexA];
-    long hashB = (bLen == 0) ? thetaLong : cacheB[indexB];
-
-    while ((indexA < aLen) || (indexB < bLen)) {
-      if (hashA == hashB) {
-        if (hashA < thetaLong) {
-          if (indexOut >= k) {
-            thetaLong = hashA;
-            break;
-          }
-          outCache[indexOut++] = hashA;
-          hashA = (++indexA < aLen) ? cacheA[indexA] : thetaLong;
-          hashB = (++indexB < bLen) ? cacheB[indexB] : thetaLong;
-          continue;
-        }
-        break;
-      }
-      else if (hashA < hashB) {
-        if (hashA < thetaLong) {
-          if (indexOut >= k) {
-            thetaLong = hashA;
-            break;
-          }
-          outCache[indexOut++] = hashA;
-          hashA = (++indexA < aLen) ? cacheA[indexA] : thetaLong;
-          continue;
-        }
-        break;
-      }
-      else { //hashA > hashB
-        if (hashB < thetaLong) {
-          if (indexOut >= k) {
-            thetaLong = hashB;
-            break;
-          }
-          outCache[indexOut++] = hashB;
-          hashB = (++indexB < bLen) ? cacheB[indexB] : thetaLong;
-          continue;
-        }
-        break;
-      }
-    }
-
-    int curCount = indexOut;
-    final long[] outArr;
-    if (indexOut > k) { //unlikely
-      outArr = Arrays.copyOf(outCache, k); //cutback to k, just in case
-      curCount = k;
-    } else {
-      outArr = Arrays.copyOf(outCache, curCount); //copy only valid items
-    }
-    final short seedHash = computeSeedHash(DEFAULT_UPDATE_SEED);
-    final boolean srcEmpty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);
-    return CompactOperations.componentsToCompact(
-        thetaLong, curCount, seedHash, srcEmpty, true, true, true, null, outArr);
-  }
-
-  private static CompactSketch maybeCutback(final CompactSketch csk, final int k) {
-    final boolean empty = csk.isEmpty();
-    int curCount = csk.getRetainedEntries(true);
-    long thetaLong = csk.getThetaLong();
-    if (curCount > k) { //cutback to k
-      final long[] cache = (csk.hasMemory()) ? csk.getCache() : csk.getCache().clone();
-      thetaLong = cache[k];
-      final long[] arr = Arrays.copyOf(cache, k);
-      curCount = k;
-      final short seedHash = computeSeedHash(DEFAULT_UPDATE_SEED);
-      return CompactOperations.componentsToCompact(
-          thetaLong, curCount, seedHash, empty, true, false, true, null, cache);
-    }
-    return csk;
-  }
-
-  private static void checkOrdered(final CompactSketch csk) {
-    if (!csk.isOrdered()) {
-      throw new SketchesArgumentException("Given sketch must be ordered.");
-    }
-  }
-
-  private static short seedHashesCheck(final Sketch skA, final Sketch skB) {
-    final short seedHashA = skA.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null]
-    final short seedHashB = skB.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null]
-    return checkSeedHashes(seedHashA, seedHashB);
+    final Union un = new SetOperationBuilder().setNominalEntries(k).buildUnion();
+    return un.union(skA, skB);
   }
 
 }
diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java
index 89eed19..a38b719 100644
--- a/src/main/java/org/apache/datasketches/theta/SetOperation.java
+++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java
@@ -271,17 +271,4 @@ public abstract class SetOperation {
     return ret;
   }
 
-  /**
-   * This corrects a temporary anomalous condition from an intersection of two exact, disjoint sets,
-   * or AnotB of two exact, identical sets. Because there is no probability error distribuion,
-   * the result is exactly empty. The empty flag is changed to true before returning a result.
-   * This is used in the compaction step of SetOperation.
-   * @param curCount the given curCount
-   * @param thetaLong the given theta.
-   * @return the empty state
-   */
-  private static final boolean correctEmptyOnSetResult(final int curCount, final long thetaLong) {
-    return (curCount == 0) && (thetaLong == Long.MAX_VALUE);
-  }
-
 }
diff --git a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
index 3f4ba4d..4341be0 100644
--- a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
@@ -51,11 +51,12 @@ public class AnotBimplTest {
     }
 
     AnotB aNb = SetOperation.builder().buildANotB();
-    assertTrue(aNb.isEmpty());
-    assertNull(aNb.getCache());
-    assertEquals(aNb.getThetaLong(), Long.MAX_VALUE);
+    assertTrue(aNb.isEmpty());  //only applies to stateful
+    assertNull(aNb.getCache()); //only applies to stateful
+    assertEquals(aNb.getThetaLong(), Long.MAX_VALUE); //only applies to stateful
     assertEquals(aNb.getSeedHash(), Util.computeSeedHash(DEFAULT_UPDATE_SEED));
 
+
     aNb.update(usk1, usk2);
     assertEquals(aNb.getRetainedEntries(true), 256);
 
diff --git a/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
index 2626760..d29a84d 100644
--- a/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
@@ -244,32 +244,12 @@ public class SetOpsCornerCasesTest {
     CompactSketch skHeap = generate(EST_HEAP, k);
     CompactSketch skHeapUO = generate(EST_MEMORY_UNORDERED, k);
 
-    try {
-      PairwiseSetOperations.union(skNull, skHeapUO, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
-    try {
-      PairwiseSetOperations.union(skEmpty, skHeapUO, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
-
-    try {
-      PairwiseSetOperations.union(skHeapUO, skNull, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
-    try {
-      PairwiseSetOperations.union(skHeapUO, skEmpty, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
-
-    try {
-      PairwiseSetOperations.union(skHeapUO, skHeap, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
-    try {
-      PairwiseSetOperations.union(skHeap, skHeapUO, k);
-      Assert.fail();
-    } catch (Exception e) { } //pass
+    PairwiseSetOperations.union(skNull, skHeapUO, k);
+    PairwiseSetOperations.union(skEmpty, skHeapUO, k);
+    PairwiseSetOperations.union(skHeapUO, skNull, k);
+    PairwiseSetOperations.union(skHeapUO, skEmpty, k);
+    PairwiseSetOperations.union(skHeapUO, skHeap, k);
+    PairwiseSetOperations.union(skHeap, skHeapUO, k);
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org