You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/07/07 23:37:12 UTC

[incubator-datasketches-java] 09/10: Close to final set of changes.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch Refactor_Theta_Tuple
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git

commit e3f4ad2dee4d75356f587a4143634ac862c3493e
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jul 7 14:38:28 2020 -0700

    Close to final set of changes.
---
 .../org/apache/datasketches/HashOperations.java    | 193 +++++++++++----------
 .../theta/DirectQuickSelectSketch.java             |   2 +-
 .../theta/DirectQuickSelectSketchR.java            |  18 +-
 .../datasketches/theta/HeapQuickSelectSketch.java  |  13 +-
 .../datasketches/theta/HeapUpdateSketch.java       |  12 +-
 .../datasketches/theta/IntersectionImpl.java       |   2 +-
 .../java/org/apache/datasketches/theta/Sketch.java |  19 +-
 .../apache/datasketches/theta/UpdateSketch.java    |  39 ++---
 .../datasketches/tuple/QuickSelectSketch.java      |  39 +++--
 .../DirectArrayOfDoublesQuickSelectSketch.java     |   6 +-
 .../apache/datasketches/HashOperationsTest.java    |  23 +--
 11 files changed, 194 insertions(+), 172 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/HashOperations.java b/src/main/java/org/apache/datasketches/HashOperations.java
index 47bf16a..9067b79 100644
--- a/src/main/java/org/apache/datasketches/HashOperations.java
+++ b/src/main/java/org/apache/datasketches/HashOperations.java
@@ -33,7 +33,6 @@ import org.apache.datasketches.memory.WritableMemory;
  */
 public final class HashOperations {
   private static final int STRIDE_HASH_BITS = 7;
-
   private static final int EMPTY = 0;
 
   /**
@@ -43,47 +42,8 @@ public final class HashOperations {
 
   private HashOperations() {}
 
-  /**
-   * Counts the cardinality of the first Log2 values of the given source array.
-   * @param srcArr the given source array
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>
-   * @param thetaLong <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
-   * @return the cardinality
-   */
-  public static int countPart(final long[] srcArr, final int lgArrLongs, final long thetaLong) {
-    int cnt = 0;
-    final int len = 1 << lgArrLongs;
-    for (int i = len; i-- > 0;) {
-      final long hash = srcArr[i];
-      if (continueCondition(thetaLong, hash) ) {
-        continue;
-      }
-      cnt++ ;
-    }
-    return cnt;
-  }
-
-  /**
-   * Counts the cardinality of the given source array.
-   * @param srcArr the given source array
-   * @param thetaLong <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
-   * @return the cardinality
-   */
-  public static int count(final long[] srcArr, final long thetaLong) {
-    int cnt = 0;
-    final int len = srcArr.length;
-    for (int i = len; i-- > 0;) {
-      final long hash = srcArr[i];
-      if (continueCondition(thetaLong, hash) ) {
-        continue;
-      }
-      cnt++ ;
-    }
-    return cnt;
-  }
-
-  // make odd and independent of index assuming lgArrLongs lowest bits of the hash were used for
-  //  index
+  //Make odd and independent of index assuming lgArrLongs lowest bits of the hash were used for
+  //  index. This results in a 8 bit value that is always odd.
   private static int getStride(final long hash, final int lgArrLongs) {
     return (2 * (int) ((hash >>> lgArrLongs) & STRIDE_MASK) ) + 1;
   }
@@ -91,18 +51,18 @@ public final class HashOperations {
   //ON-HEAP
 
   /**
-   * This is a classical Knuth-style Open Addressing, Double Hash search scheme for on-heap.
+   * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for on-heap.
    * Returns the index if found, -1 if not found.
    *
-   * @param hashTable The hash table to search. Must be a power of 2 in size.
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param hash A hash value to search for. Must not be zero.
+   * @param hashTable The hash table to search. Its size must be a power of 2.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param hash The hash value to search for. It must not be zero.
    * @return Current probe index if found, -1 if not found.
    */
   public static int hashSearch(final long[] hashTable, final int lgArrLongs, final long hash) {
     if (hash == 0) {
-      throw new SketchesArgumentException("Given hash cannot be zero: " + hash);
+      throw new SketchesArgumentException("Given hash must not be zero: " + hash);
     }
     final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1
     final int stride = getStride(hash, lgArrLongs);
@@ -123,16 +83,16 @@ public final class HashOperations {
   }
 
   /**
-   * This is a classical Knuth-style Open Addressing, Double Hash insert scheme for on-heap.
+   * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap.
    * This method assumes that the input hash is not a duplicate.
    * Useful for rebuilding tables to avoid unnecessary comparisons.
-   * Returns the index of insertion, which is always positive or zero. Throws an exception if the
-   * table is full with no empty slot.
+   * Returns the index of insertion, which is always positive or zero.
+   * Throws an exception if the table has no empty slot.
    *
-   * @param hashTable the hash table to insert into.
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param hash value that must not be zero and will be inserted into the array into an empty slot.
+   * @param hashTable the hash table to insert into. Its size must be a power of 2.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param hash The hash value to be potentially inserted into an empty slot. It must not be zero.
    * @return index of insertion.  Always positive or zero.
    */
   public static int hashInsertOnly(final long[] hashTable, final int lgArrLongs, final long hash) {
@@ -153,15 +113,15 @@ public final class HashOperations {
   }
 
   /**
-   * This is a classical Knuth-style Open Addressing, Double Hash insert scheme for on-heap.
+   * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for on-heap.
    * Returns index &ge; 0 if found (duplicate); &lt; 0 if inserted, inserted at -(index + 1).
    * Throws an exception if the value is not found and table has no empty slot.
    *
-   * @param hashTable the hash table to insert into.
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param hash hash value that must not be zero and if not a duplicate will be inserted into the
-   * array into an empty slot
+   * @param hashTable The hash table to insert into. Its size must be a power of 2.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param hash The hash value to be potentially inserted into an empty slot only if it is not
+   * a duplicate of any other hash value in the table. It must not be zero.
    * @return index &ge; 0 if found (duplicate); &lt; 0 if inserted, inserted at -(index + 1).
    */
   public static int hashSearchOrInsert(final long[] hashTable, final int lgArrLongs,
@@ -182,22 +142,24 @@ public final class HashOperations {
       }
       curProbe = (curProbe + stride) & arrayMask;
     } while (curProbe != loopIndex);
-    throw new SketchesArgumentException("Key not found and no empty slots!");
+    throw new SketchesArgumentException("Hash not found and no empty slots!");
   }
 
   /**
-   * Inserts the given long array into the given hash table array of the target size,
-   * removes any negative input values, ignores duplicates and counts the values inserted.
+   * Inserts the given long array into the given OADH hashTable of the target size,
+   * ignores duplicates and counts the values inserted.
+   * The hash values must not be negative, zero values and values &ge; thetaLong are ignored.
    * The given hash table may have values, but they must have been inserted by this method or one
-   * of the other OADH insert methods in this class and they may not be dirty.
+   * of the other OADH insert methods in this class.
    * This method performs additional checks against potentially invalid hash values or theta values.
    * Returns the count of values actually inserted.
    *
    * @param srcArr the source hash array to be potentially inserted
-   * @param hashTable The correctly sized target hash table that must be a power of two.
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param thetaLong must greater than zero
+   * @param hashTable The hash table to insert into. Its size must be a power of 2.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param thetaLong The theta value that all input hash values are compared against.
+   * It must greater than zero.
    * <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
    * @return the count of values actually inserted
    */
@@ -219,21 +181,25 @@ public final class HashOperations {
     return count;
   }
 
-  //OFF-HEAP (these are kept for backward compatibility)
+  //With Memory or WritableMemory
 
   /**
-   * This is a classical Knuth-style Open Addressing, Double Hash search scheme for off-heap.
+   * This is a classical Knuth-style Open Addressing, Double Hash (OADH) search scheme for Memory.
    * Returns the index if found, -1 if not found.
    *
-   * @param mem The Memory hash table to search.
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param hash A hash value to search for. Must not be zero.
-   * @param memOffsetBytes offset in the memory where the hash array starts
-   * @return index if found, -1 if not found.
+   * @param mem The <i>Memory</i> containing the hash table to search.
+   * The hash table portion must be a power of 2 in size.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param hash The hash value to search for. Must not be zero.
+   * @param memOffsetBytes offset in the memory where the hashTable starts
+   * @return Current probe index if found, -1 if not found.
    */
-  public static int hashSearch(final Memory mem, final int lgArrLongs, final long hash,
+  public static int hashSearchMemory(final Memory mem, final int lgArrLongs, final long hash,
       final int memOffsetBytes) {
+    if (hash == 0) {
+      throw new SketchesArgumentException("Given hash must not be zero: " + hash);
+    }
     final int arrayMask = (1 << lgArrLongs) - 1;
     final int stride = getStride(hash, lgArrLongs);
     int curProbe = (int) (hash & arrayMask);
@@ -249,21 +215,21 @@ public final class HashOperations {
   }
 
   /**
-   * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts
-   * values directly into a Memory.
+   * This is a classical Knuth-style Open Addressing, Double Hash (OADH) insert scheme for Memory.
    * This method assumes that the input hash is not a duplicate.
    * Useful for rebuilding tables to avoid unnecessary comparisons.
    * Returns the index of insertion, which is always positive or zero.
    * Throws an exception if table has no empty slot.
    *
-   * @param wmem The writable memory
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
+   * @param wmem The <i>WritableMemory</i> that contains the hashTable to insert into.
+   * The size of the hashTable portion must be a power of 2.
+   * @param lgArrLongs The log_base2(hashTable.length.
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
    * @param hash value that must not be zero and will be inserted into the array into an empty slot.
-   * @param memOffsetBytes offset in the memory where the hash array starts
+   * @param memOffsetBytes offset in the <i>WritableMemory</i> where the hashTable starts
    * @return index of insertion.  Always positive or zero.
    */
-  public static int fastHashInsertOnly(final WritableMemory wmem, final int lgArrLongs,
+  public static int hashInsertOnlyMemory(final WritableMemory wmem, final int lgArrLongs,
       final long hash, final int memOffsetBytes) {
     final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1
     final int stride = getStride(hash, lgArrLongs);
@@ -282,23 +248,21 @@ public final class HashOperations {
     throw new SketchesArgumentException("No empty slot in table!");
   }
 
-  //FAST OFF-HEAP
-
   /**
    * This is a classical Knuth-style Open Addressing, Double Hash insert scheme, but inserts
    * values directly into a Memory.
    * Returns index &ge; 0 if found (duplicate); &lt; 0 if inserted, inserted at -(index + 1).
    * Throws an exception if the value is not found and table has no empty slot.
    *
-   * @param wmem the WritableMemory
-   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
-   * lgArrLongs &le; log2(hashTable.length).
-   * @param hash A hash value that must not be zero and if not a duplicate will be inserted into the
-   * array into an empty slot.
-   * @param memOffsetBytes offset in the memory where the hash array starts
+   * @param wmem The <i>WritableMemory</i> that contains the hashTable to insert into.
+   * @param lgArrLongs The log_base2(hashTable.length).
+   * <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>.
+   * @param hash The hash value to be potentially inserted into an empty slot only if it is not
+   * a duplicate of any other hash value in the table. It must not be zero.
+   * @param memOffsetBytes offset in the <i>WritableMemory</i> where the hash array starts
    * @return index &ge; 0 if found (duplicate); &lt; 0 if inserted, inserted at -(index + 1).
    */
-  public static int fastHashSearchOrInsert(final WritableMemory wmem, final int lgArrLongs,
+  public static int hashSearchOrInsertMemory(final WritableMemory wmem, final int lgArrLongs,
       final long hash, final int memOffsetBytes) {
     final int arrayMask = (1 << lgArrLongs) - 1; // current Size -1
     final int stride = getStride(hash, lgArrLongs);
@@ -318,6 +282,8 @@ public final class HashOperations {
     throw new SketchesArgumentException("Key not found and no empty slot in table!");
   }
 
+  //Other related methods
+
   /**
    * @param thetaLong must be greater than zero otherwise throws an exception.
    * <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
@@ -378,4 +344,43 @@ public final class HashOperations {
     return hashTable;
   }
 
+  /**
+   * Counts the cardinality of the first Log2 values of the given source array.
+   * @param srcArr the given source array
+   * @param lgArrLongs <a href="{@docRoot}/resources/dictionary.html#lgArrLongs">See lgArrLongs</a>
+   * @param thetaLong <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
+   * @return the cardinality
+   */
+  public static int countPart(final long[] srcArr, final int lgArrLongs, final long thetaLong) {
+    int cnt = 0;
+    final int len = 1 << lgArrLongs;
+    for (int i = len; i-- > 0;) {
+      final long hash = srcArr[i];
+      if (continueCondition(thetaLong, hash) ) {
+        continue;
+      }
+      cnt++ ;
+    }
+    return cnt;
+  }
+
+  /**
+   * Counts the cardinality of the given source array.
+   * @param srcArr the given source array
+   * @param thetaLong <a href="{@docRoot}/resources/dictionary.html#thetaLong">See Theta Long</a>
+   * @return the cardinality
+   */
+  public static int count(final long[] srcArr, final long thetaLong) {
+    int cnt = 0;
+    final int len = srcArr.length;
+    for (int i = len; i-- > 0;) {
+      final long hash = srcArr[i];
+      if (continueCondition(thetaLong, hash) ) {
+        continue;
+      }
+      cnt++ ;
+    }
+    return cnt;
+  }
+
 }
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
index dc34bc5..df24c11 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketch.java
@@ -260,7 +260,7 @@ class DirectQuickSelectSketch extends DirectQuickSelectSketchR {
 
     //The duplicate test
     final int index =
-        HashOperations.fastHashSearchOrInsert(wmem_, lgArrLongs, hash, preambleLongs << 3);
+        HashOperations.hashSearchOrInsertMemory(wmem_, lgArrLongs, hash, preambleLongs << 3);
     if (index >= 0) {
       return RejectedDuplicate; //Duplicate, not inserted
     }
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index d5290b1..d79b71b 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -188,11 +188,21 @@ class DirectQuickSelectSketchR extends UpdateSketch {
   }
 
   @Override
+  float getP() {
+    return wmem_.getFloat(P_FLOAT);
+  }
+
+  @Override
   public ResizeFactor getResizeFactor() {
     return ResizeFactor.getRF(getLgRF());
   }
 
   @Override
+  long getSeed() {
+    return seed_;
+  }
+
+  @Override
   public UpdateSketch rebuild() {
     throw new SketchesReadOnlyException();
   }
@@ -229,15 +239,7 @@ class DirectQuickSelectSketchR extends UpdateSketch {
     return wmem_;
   }
 
-  @Override
-  float getP() {
-    return wmem_.getFloat(P_FLOAT);
-  }
 
-  @Override
-  long getSeed() {
-    return seed_;
-  }
 
   @Override
   short getSeedHash() {
diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
index b3dcf6b..14197aa 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
@@ -280,14 +280,15 @@ class HeapQuickSelectSketch extends HeapUpdateSketch {
     return numEntries > hashTableThreshold_;
   }
 
-  //Must resize. Changes lgArrLongs_ and cache_. theta and count don't change.
+  //Must resize. Changes lgArrLongs_, cache_, hashTableThreshold;
+  // theta and count don't change.
   // Used by hashUpdate()
   private final void resizeCache() {
     final ResizeFactor rf = getResizeFactor();
-    final int lgTgtLongs = lgNomLongs_ + 1;
-    final int lgDeltaLongs = lgTgtLongs - lgArrLongs_;
+    final int lgMaxArrLongs = lgNomLongs_ + 1;
+    final int lgDeltaLongs = lgMaxArrLongs - lgArrLongs_;
     final int lgResizeFactor = max(min(rf.lg(), lgDeltaLongs), 1); //rf_.lg() could be 0
-    lgArrLongs_ += lgResizeFactor; // new tgt size
+    lgArrLongs_ += lgResizeFactor; // new arr size
 
     final long[] tgtArr = new long[1 << lgArrLongs_];
     final int newCount = HashOperations.hashArrayInsert(cache_, tgtArr, lgArrLongs_, thetaLong_);
@@ -301,9 +302,9 @@ class HeapQuickSelectSketch extends HeapUpdateSketch {
 
   //array stays the same size. Changes theta and thus count
   private final void quickSelectAndRebuild() {
-    final int arrLongs = 1 << lgArrLongs_;
+    final int arrLongs = 1 << lgArrLongs_; // generally 2 * k,
 
-    final int pivot = (1 << lgNomLongs_) + 1; // pivot for QS
+    final int pivot = (1 << lgNomLongs_) + 1; // pivot for QS = k + 1
 
     thetaLong_ = selectExcludingZeros(cache_, curCount_, pivot); //messes up the cache_
 
diff --git a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java
index d62e88d..2bc2915 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapUpdateSketch.java
@@ -85,15 +85,13 @@ abstract class HeapUpdateSketch extends UpdateSketch {
   }
 
   @Override
-  public ResizeFactor getResizeFactor() {
-    return rf_;
+  float getP() {
+    return p_;
   }
 
-  //restricted methods
-
   @Override
-  float getP() {
-    return p_;
+  public ResizeFactor getResizeFactor() {
+    return rf_;
   }
 
   @Override
@@ -101,6 +99,8 @@ abstract class HeapUpdateSketch extends UpdateSketch {
     return seed_;
   }
 
+  //restricted methods
+
   @Override
   short getSeedHash() {
     return Util.computeSeedHash(getSeed());
diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
index d5478e4..611130d 100644
--- a/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
@@ -359,7 +359,7 @@ final class IntersectionImpl extends IntersectionImplR {
       for (int i = 0; i < arrLongsIn; i++ ) {
         final long hashIn = arr[i];
         if (HashOperations.continueCondition(thetaLong, hashIn)) { continue; }
-        HashOperations.fastHashInsertOnly(mem_, lgArrLongs, hashIn, preBytes);
+        HashOperations.hashInsertOnlyMemory(mem_, lgArrLongs, hashIn, preBytes);
         tmpCnt++;
       }
     } else { //On Heap. Assumes HT exists and is large enough
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index d65c5cb..0075b82 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -280,14 +280,6 @@ public abstract class Sketch {
   public abstract Family getFamily();
 
   /**
-   * Returns a HashIterator that can be used to iterate over the retained hash values of the
-   * Theta sketch.
-   * @return a HashIterator that can be used to iterate over the retained hash values of the
-   * Theta sketch.
-   */
-  public abstract HashIterator iterator();
-
-  /**
    * Gets the approximate lower error bound given the specified number of Standard Deviations.
    * This will return getEstimate() if isEmpty() is true.
    *
@@ -437,6 +429,14 @@ public abstract class Sketch {
   }
 
   /**
+   * Returns a HashIterator that can be used to iterate over the retained hash values of the
+   * Theta sketch.
+   * @return a HashIterator that can be used to iterate over the retained hash values of the
+   * Theta sketch.
+   */
+  public abstract HashIterator iterator();
+
+  /**
    * Serialize this sketch to a byte array form.
    * @return byte array of this sketch
    */
@@ -568,7 +568,7 @@ public abstract class Sketch {
 
   /**
    * Gets the internal cache array. For on-heap sketches this will return a reference to the actual
-   * cache array. For off-heap sketches this returns a copy.
+   * cache array. For Memory-based sketches this returns a copy.
    * @return the internal cache array.
    */
   abstract long[] getCache();
@@ -633,7 +633,6 @@ public abstract class Sketch {
     }
   }
 
-
   static final double estimate(final long thetaLong, final int curCount) {
     return curCount * (LONG_MAX_VALUE_AS_DOUBLE / thetaLong);
   }
diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
index c28c983..740b03e 100644
--- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
@@ -167,7 +167,6 @@ public abstract class UpdateSketch extends Sketch {
 
   /**
    * Returns a new builder
-   *
    * @return a new builder
    */
   public static final UpdateSketchBuilder builder() {
@@ -175,6 +174,25 @@ public abstract class UpdateSketch extends Sketch {
   }
 
   /**
+   * Returns the configured ResizeFactor
+   * @return the configured ResizeFactor
+   */
+  public abstract ResizeFactor getResizeFactor();
+
+  /**
+   * Gets the configured sampling probability, <i>p</i>.
+   * <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>
+   * @return the sampling probability, <i>p</i>
+   */
+  abstract float getP();
+
+  /**
+   * Gets the configured seed
+   * @return the configured seed
+   */
+  abstract long getSeed();
+
+  /**
    * Resets this sketch back to a virgin empty state.
    */
   public abstract void reset();
@@ -187,12 +205,6 @@ public abstract class UpdateSketch extends Sketch {
   public abstract UpdateSketch rebuild();
 
   /**
-   * Returns the configured ResizeFactor
-   * @return the configured ResizeFactor
-   */
-  public abstract ResizeFactor getResizeFactor();
-
-  /**
    * Present this sketch with a long.
    *
    * @param datum The given long datum.
@@ -330,19 +342,6 @@ public abstract class UpdateSketch extends Sketch {
   public abstract int getLgNomLongs();
 
   /**
-   * Gets the configured sampling probability, <i>p</i>.
-   * <a href="{@docRoot}/resources/dictionary.html#p">See Sampling Probability, <i>p</i></a>
-   * @return the sampling probability, <i>p</i>
-   */
-  abstract float getP();
-
-  /**
-   * Gets the configured seed
-   * @return the configured seed
-   */
-  abstract long getSeed();
-
-  /**
    * Returns true if the internal cache contains "dirty" values that are greater than or equal
    * to thetaLong.
    * @return true if the internal cache is dirty.
diff --git a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
index 26abe61..e6fb943 100644
--- a/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/QuickSelectSketch.java
@@ -41,7 +41,6 @@ import org.apache.datasketches.memory.Memory;
  * @param <S> type of Summary
  */
 class QuickSelectSketch<S extends Summary> extends Sketch<S> {
-  //private static final byte serialVersionWithSummaryFactoryUID = 1;
   private static final byte serialVersionUID = 2;
 
   private enum Flags { IS_BIG_ENDIAN, IS_IN_SAMPLING_MODE, IS_EMPTY, HAS_ENTRIES, IS_THETA_INCLUDED }
@@ -63,7 +62,9 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
    * given value.
    * @param summaryFactory An instance of a SummaryFactory.
    */
-  QuickSelectSketch(final int nomEntries, final SummaryFactory<S> summaryFactory) {
+  QuickSelectSketch(
+      final int nomEntries,
+      final SummaryFactory<S> summaryFactory) {
     this(nomEntries, DEFAULT_LG_RESIZE_FACTOR, summaryFactory);
   }
 
@@ -80,7 +81,9 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
    * </pre>
    * @param summaryFactory An instance of a SummaryFactory.
    */
-  QuickSelectSketch(final int nomEntries, final int lgResizeFactor,
+  QuickSelectSketch(
+      final int nomEntries,
+      final int lgResizeFactor,
       final SummaryFactory<S> summaryFactory) {
     this(nomEntries, lgResizeFactor, 1f, summaryFactory);
   }
@@ -100,7 +103,10 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
    * @param samplingProbability the given sampling probability
    * @param summaryFactory An instance of a SummaryFactory.
    */
-  QuickSelectSketch(final int nomEntries, final int lgResizeFactor, final float samplingProbability,
+  QuickSelectSketch(
+      final int nomEntries,
+      final int lgResizeFactor,
+      final float samplingProbability,
       final SummaryFactory<S> summaryFactory) {
     this(
       nomEntries,
@@ -111,8 +117,12 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
     );
   }
 
-  QuickSelectSketch(final int nomEntries, final int lgResizeFactor, final float samplingProbability,
-      final SummaryFactory<S> summaryFactory, final int startingSize) {
+  QuickSelectSketch(
+      final int nomEntries,
+      final int lgResizeFactor,
+      final float samplingProbability,
+      final SummaryFactory<S> summaryFactory,
+      final int startingSize) {
     nomEntries_ = ceilingPowerOf2(nomEntries);
     lgResizeFactor_ = lgResizeFactor;
     samplingProbability_ = samplingProbability;
@@ -130,7 +140,9 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
    * @param deserializer the SummaryDeserializer
    * @param summaryFactory the SummaryFactory
    */
-  QuickSelectSketch(final Memory mem, final SummaryDeserializer<S> deserializer,
+  QuickSelectSketch(
+      final Memory mem,
+      final SummaryDeserializer<S> deserializer,
       final SummaryFactory<S> summaryFactory) {
     summaryFactory_ = summaryFactory;
     int offset = 0;
@@ -202,7 +214,6 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
     return summaryTable_;
   }
 
-
   /**
    * Get configured nominal number of entries
    * @return nominal number of entries
@@ -249,7 +260,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
   public void trim() {
     if (count_ > nomEntries_) {
       updateTheta();
-      rebuild(hashTable_.length);
+      resize(hashTable_.length);
     }
   }
 
@@ -427,13 +438,13 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
       updateTheta();
       rebuild();
     } else {
-      rebuild(hashTable_.length * (1 << lgResizeFactor_));
+      resize(hashTable_.length * (1 << lgResizeFactor_));
     }
     return true;
   }
 
   void rebuild() {
-    rebuild(hashTable_.length);
+    resize(hashTable_.length);
   }
 
   void insert(final long hash, final S summary) {
@@ -446,6 +457,10 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
   private void updateTheta() {
     final long[] hashArr = new long[count_];
     int i = 0;
+    //Because of the association of the hashTable with the summaryTable we cannot destroy the
+    // hashTable structure. So we must copy. May as well compact at the same time.
+    // Might consider a whole table clone and use the selectExcludingZeros method instead.
+    // Not sure if there would be any speed advantage.
     for (int j = 0; j < hashTable_.length; j++) {
       if (summaryTable_[j] != null) {
         hashArr[i++] = hashTable_[j];
@@ -455,7 +470,7 @@ class QuickSelectSketch<S extends Summary> extends Sketch<S> {
   }
 
   @SuppressWarnings({"unchecked"})
-  private void rebuild(final int newSize) {
+  private void resize(final int newSize) {
     final long[] oldHashTable = hashTable_;
     final S[] oldSummaryTable = summaryTable_;
     final Class<S> summaryType = (Class<S>) summaryTable_.getClass().getComponentType();
diff --git a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
index c47dd1a..5c4e4b4 100644
--- a/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/tuple/arrayofdoubles/DirectArrayOfDoublesQuickSelectSketch.java
@@ -294,17 +294,17 @@ class DirectArrayOfDoublesQuickSelectSketch extends ArrayOfDoublesQuickSelectSke
 
   @Override
   protected int insertKey(final long key) {
-    return HashOperations.fastHashInsertOnly(mem_, lgCurrentCapacity_, key, ENTRIES_START);
+    return HashOperations.hashInsertOnlyMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START);
   }
 
   @Override
   protected int findOrInsertKey(final long key) {
-    return HashOperations.fastHashSearchOrInsert(mem_, lgCurrentCapacity_, key, ENTRIES_START);
+    return HashOperations.hashSearchOrInsertMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START);
   }
 
   @Override
   protected double[] find(final long key) {
-    final int index = HashOperations.hashSearch(mem_, lgCurrentCapacity_, key, ENTRIES_START);
+    final int index = HashOperations.hashSearchMemory(mem_, lgCurrentCapacity_, key, ENTRIES_START);
     if (index == -1) { return null; }
     final double[] array = new double[numValues_];
     mem_.getDoubleArray(valuesOffset_ + ((long) SIZE_OF_VALUE_BYTES * numValues_ * index),
diff --git a/src/test/java/org/apache/datasketches/HashOperationsTest.java b/src/test/java/org/apache/datasketches/HashOperationsTest.java
index 00c34fa..f7495aa 100644
--- a/src/test/java/org/apache/datasketches/HashOperationsTest.java
+++ b/src/test/java/org/apache/datasketches/HashOperationsTest.java
@@ -22,11 +22,12 @@ package org.apache.datasketches;
 import static org.apache.datasketches.HashOperations.checkHashCorruption;
 import static org.apache.datasketches.HashOperations.checkThetaCorruption;
 import static org.apache.datasketches.HashOperations.continueCondition;
-import static org.apache.datasketches.HashOperations.fastHashInsertOnly;
-import static org.apache.datasketches.HashOperations.fastHashSearchOrInsert;
+import static org.apache.datasketches.HashOperations.hashInsertOnlyMemory;
+import static org.apache.datasketches.HashOperations.hashSearchOrInsertMemory;
 import static org.apache.datasketches.HashOperations.hashArrayInsert;
 import static org.apache.datasketches.HashOperations.hashInsertOnly;
 import static org.apache.datasketches.HashOperations.hashSearch;
+import static org.apache.datasketches.HashOperations.hashSearchMemory;
 import static org.apache.datasketches.HashOperations.hashSearchOrInsert;
 import static org.apache.datasketches.hash.MurmurHash3.hash;
 import static org.testng.Assert.assertEquals;
@@ -106,7 +107,7 @@ public class HashOperationsTest {
   public void testHashInsertOnlyMemoryNoStride() {
     final long[] table = new long[32];
     final WritableMemory mem = WritableMemory.wrap(table);
-    final int index = fastHashInsertOnly(mem, 5, 1, 0);
+    final int index = hashInsertOnlyMemory(mem, 5, 1, 0);
     assertEquals(index, 1);
     assertEquals(table[1], 1L);
   }
@@ -116,7 +117,7 @@ public class HashOperationsTest {
     final long[] table = new long[32];
     table[1] = 1;
     final WritableMemory mem = WritableMemory.wrap(table);
-    final int index = fastHashInsertOnly(mem, 5, 1, 0);
+    final int index = hashInsertOnlyMemory(mem, 5, 1, 0);
     assertEquals(index, 2);
     assertEquals(table[2], 1L);
   }
@@ -152,22 +153,22 @@ public class HashOperationsTest {
     final long[] table = new long[32];
     final WritableMemory mem = WritableMemory.wrap(table);
     for (int i = 1; i <= 32; ++i) {
-      fastHashInsertOnly(mem, 5, i, 0);
+      hashInsertOnlyMemory(mem, 5, i, 0);
     }
 
     // table full; search returns not found, others throw exception
-    final int retVal = hashSearch(mem, 5, 33, 0);
+    final int retVal = hashSearchMemory(mem, 5, 33, 0);
     assertEquals(retVal, -1);
 
     try {
-      fastHashInsertOnly(mem, 5, 33, 0);
+      hashInsertOnlyMemory(mem, 5, 33, 0);
       fail();
     } catch (final SketchesArgumentException e) {
       // expected
     }
 
     try {
-      fastHashSearchOrInsert(mem, 5, 33, 0);
+      hashSearchOrInsertMemory(mem, 5, 33, 0);
       fail();
     } catch (final SketchesArgumentException e) {
       // expected
@@ -180,19 +181,19 @@ public class HashOperationsTest {
     final WritableMemory wmem = WritableMemory.wrap(table);
 
     for (int i = 1; i <= 32; ++i) {
-      fastHashInsertOnly(wmem, 5, i, 0);
+      hashInsertOnlyMemory(wmem, 5, i, 0);
     }
 
     // table full; throws exception
     try {
-      fastHashInsertOnly(wmem, 5, 33, 0);
+      hashInsertOnlyMemory(wmem, 5, 33, 0);
       fail();
     } catch (final SketchesArgumentException e) {
       // expected
     }
 
     try {
-      fastHashSearchOrInsert(wmem, 5, 33, 0);
+      hashSearchOrInsertMemory(wmem, 5, 33, 0);
       fail();
     } catch (final SketchesArgumentException e) {
       // expected


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org