You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/06/26 21:12:02 UTC
[incubator-datasketches-java] 03/03: Interim changes,
all tests work.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch Refactor_Theta_Tuple
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git
commit bc77a086f7b772b73873a18dbe3bfac051cb048b
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Fri Jun 26 14:11:17 2020 -0700
Interim changes, all tests work.
---
.../org/apache/datasketches/theta/AnotBimpl.java | 177 +++++++++++----------
.../datasketches/theta/CompactOperations.java | 156 +++++++++++++-----
.../apache/datasketches/theta/CompactSketch.java | 63 --------
.../theta/DirectCompactOrderedSketch.java | 46 ------
.../datasketches/theta/DirectCompactSketch.java | 102 ++++--------
.../theta/DirectCompactUnorderedSketch.java | 44 -----
.../theta/DirectQuickSelectSketchR.java | 2 +-
.../apache/datasketches/theta/HeapAlphaSketch.java | 2 +-
.../theta/HeapCompactOrderedSketch.java | 12 +-
.../datasketches/theta/HeapCompactSketch.java | 1 +
.../theta/HeapCompactUnorderedSketch.java | 39 +----
.../datasketches/theta/HeapQuickSelectSketch.java | 2 +-
.../datasketches/theta/IntersectionImplR.java | 8 +-
.../datasketches/theta/PairwiseSetOperations.java | 19 ++-
.../apache/datasketches/theta/PreambleUtil.java | 71 ---------
.../apache/datasketches/theta/SetOperation.java | 52 +-----
.../datasketches/theta/SetOperationBuilder.java | 2 +-
.../datasketches/theta/SingleItemSketch.java | 48 ++++--
.../java/org/apache/datasketches/theta/Sketch.java | 55 +++----
.../org/apache/datasketches/theta/UnionImpl.java | 10 +-
.../apache/datasketches/theta/UpdateSketch.java | 6 +-
.../apache/datasketches/theta/AnotBimplTest.java | 2 +-
.../datasketches/theta/CompactSketchTest.java | 116 ++++++++++++--
.../ConcurrentDirectQuickSelectSketchTest.java | 14 +-
.../theta/ConcurrentHeapQuickSelectSketchTest.java | 17 +-
.../theta/DirectQuickSelectSketchTest.java | 13 +-
.../org/apache/datasketches/theta/EmptyTest.java | 7 +-
.../theta/ForwardCompatibilityTest.java | 25 +--
.../datasketches/theta/HeapAlphaSketchTest.java | 11 +-
.../theta/HeapQuickSelectSketchTest.java | 11 +-
.../theta/PairwiseSetOperationsTest.java | 2 +-
.../datasketches/theta/SetOperationTest.java | 11 +-
.../datasketches/theta/SetOpsCornerCasesTest.java | 3 +-
.../datasketches/theta/SingleItemSketchTest.java | 9 +-
.../org/apache/datasketches/theta/SketchTest.java | 9 +-
.../tuple/adouble/AdoubleAnotBTest.java | 2 +
.../tuple/adouble/AdoubleIntersectionTest.java | 1 +
.../tuple/aninteger/IntegerSketchTest.java | 3 +-
.../tuple/strings/ArrayOfStringsSketchTest.java | 5 +-
tools/FindBugsExcludeFilter.xml | 3 +-
40 files changed, 519 insertions(+), 662 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
index 702ae4f..576af7b 100644
--- a/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
+++ b/src/main/java/org/apache/datasketches/theta/AnotBimpl.java
@@ -41,11 +41,11 @@ import org.apache.datasketches.memory.WritableMemory;
* @author Kevin Lang
*/
final class AnotBimpl extends AnotB {
- private final short seedHashR_;
- private boolean emptyR_;
- private long thetaLongR_;
- private long[] hashArrR_; //compact array w curCountR_ entries
- private int curCountR_;
+ private final short seedHash_;
+ private boolean empty_;
+ private long thetaLong_;
+ private long[] hashArr_; //compact array w curCount_ entries
+ private int curCount_;
//Remove all 4 of these with deprecated
private Sketch skA_;
@@ -68,7 +68,7 @@ final class AnotBimpl extends AnotB {
* @param seedHash 16 bit hash of the chosen update seed.
*/
AnotBimpl(final short seedHash) {
- seedHashR_ = seedHash;
+ seedHash_ = seedHash;
reset();
}
@@ -84,23 +84,23 @@ final class AnotBimpl extends AnotB {
return;
}
//skA is not empty
- checkSeedHashes(seedHashR_, skA.getSeedHash());
- emptyR_ = false;
- thetaLongR_ = skA.getThetaLong();
+ checkSeedHashes(seedHash_, skA.getSeedHash());
+ empty_ = false;
+ thetaLong_ = skA.getThetaLong();
final CompactSketch cskA = (skA instanceof CompactSketch)
? (CompactSketch) skA
: ((UpdateSketch) skA).compact();
- hashArrR_ = skA.isDirect() ? cskA.getCache() : cskA.getCache().clone();
- curCountR_ = cskA.getRetainedEntries(true);
+ hashArr_ = skA.isDirect() ? cskA.getCache() : cskA.getCache().clone();
+ curCount_ = cskA.getRetainedEntries(true);
}
@Override
public void notB(final Sketch skB) {
- if (emptyR_ || (skB == null) || skB.isEmpty()) { return; }
+ if (empty_ || (skB == null) || skB.isEmpty()) { return; }
//skB is not empty
- checkSeedHashes(seedHashR_, skB.getSeedHash());
+ checkSeedHashes(seedHash_, skB.getSeedHash());
final long thetaLongB = skB.getThetaLong();
- thetaLongR_ = Math.min(thetaLongR_, thetaLongB);
+ thetaLong_ = Math.min(thetaLong_, thetaLongB);
//Build hashtable and removes hashes of skB >= theta
final int countB = skB.getRetainedEntries();
@@ -109,24 +109,24 @@ final class AnotBimpl extends AnotB {
final long[] hashTableB;
if (skB instanceof CompactSketch) {
cskB = (CompactSketch) skB;
- hashTableB = convertToHashTable(cskB.getCache(), countB, thetaLongR_, REBUILD_THRESHOLD);
+ hashTableB = convertToHashTable(cskB.getCache(), countB, thetaLong_, REBUILD_THRESHOLD);
} else {
uskB = (UpdateSketch) skB;
- hashTableB = (thetaLongR_ < thetaLongB)
- ? convertToHashTable(uskB.getCache(), countB, thetaLongR_, REBUILD_THRESHOLD)
+ hashTableB = (thetaLong_ < thetaLongB)
+ ? convertToHashTable(uskB.getCache(), countB, thetaLong_, REBUILD_THRESHOLD)
: uskB.getCache();
cskB = uskB.compact();
}
//build temporary arrays of skA
- final long[] tmpHashArrA = new long[curCountR_];
+ final long[] tmpHashArrA = new long[curCount_];
//search for non matches and build temp arrays
final int lgHTBLen = simpleIntLog2(hashTableB.length);
int nonMatches = 0;
- for (int i = 0; i < curCountR_; i++) {
- final long hash = hashArrR_[i];
- if ((hash != 0) && (hash < thetaLongR_)) { //skips hashes of A >= theta
+ for (int i = 0; i < curCount_; i++) {
+ final long hash = hashArr_[i];
+ if ((hash != 0) && (hash < thetaLong_)) { //skips hashes of A >= theta
final int index = hashSearch(hashTableB, lgHTBLen, hash);
if (index == -1) {
tmpHashArrA[nonMatches] = hash;
@@ -134,9 +134,9 @@ final class AnotBimpl extends AnotB {
}
}
}
- hashArrR_ = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
- curCountR_ = nonMatches;
- emptyR_ = (nonMatches == 0) && (thetaLongR_ == Long.MAX_VALUE);
+ hashArr_ = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
+ curCount_ = nonMatches;
+ empty_ = (nonMatches == 0) && (thetaLong_ == Long.MAX_VALUE);
}
@Override
@@ -148,7 +148,7 @@ final class AnotBimpl extends AnotB {
public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem,
final boolean reset) {
final CompactSketch result =
- getResult(hashArrR_, emptyR_, seedHashR_, curCountR_, thetaLongR_, dstOrdered, dstMem);
+ getResult(hashArr_, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem);
if (reset) { reset(); }
return result;
}
@@ -204,8 +204,8 @@ final class AnotBimpl extends AnotB {
if ((skB == null) || skB.isEmpty()) { return skA.compact(dstOrdered, dstMem); }
final short seedHashA = skA.getSeedHash();
final short seedHashB = skB.getSeedHash();
- checkSeedHashes(seedHashA, seedHashR_);
- checkSeedHashes(seedHashB, seedHashR_);
+ checkSeedHashes(seedHashA, seedHash_);
+ checkSeedHashes(seedHashB, seedHash_);
//Both skA & skB are not empty
//Load skA into local tmp registers
@@ -245,18 +245,18 @@ final class AnotBimpl extends AnotB {
empty = ((nonMatches == 0) && (thetaLong == Long.MAX_VALUE));
final long[] hashArrOut = Arrays.copyOfRange(tmpHashArrA, 0, nonMatches);
final CompactSketch result =
- AnotBimpl.getResult(hashArrOut, empty, seedHashR_, nonMatches, thetaLong, dstOrdered, dstMem);
+ AnotBimpl.getResult(hashArrOut, empty, seedHash_, nonMatches, thetaLong, dstOrdered, dstMem);
return result;
}
@Override
int getRetainedEntries(final boolean valid) {
- return curCountR_;
+ return curCount_;
}
@Override
boolean isEmpty() {
- return emptyR_;
+ return empty_;
}
@Override
@@ -266,41 +266,42 @@ final class AnotBimpl extends AnotB {
//Deprecated methods
+ @Deprecated
@Override
public void update(final Sketch a, final Sketch b) {
skA_ = a;
skB_ = b;
- thetaLongR_ = Long.MAX_VALUE;
- emptyR_ = true;
- hashArrR_ = null;
- curCountR_ = 0;
+ thetaLong_ = Long.MAX_VALUE; //*
+ empty_ = true; //*
+ hashArr_ = null; //*
+ curCount_ = 0; //*
lgArrLongsHT_ = 5;
bHashTable_ = null;
compute();
}
+ @Deprecated
@Override
public CompactSketch getResult() {
return getResult(true, null);
}
+ @Deprecated
@Override
public CompactSketch getResult(final boolean dstOrdered, final WritableMemory dstMem) {
- final long[] compactCache = (curCountR_ <= 0)
+ final long[] compactCache = (curCount_ <= 0)
? new long[0]
- : Arrays.copyOfRange(hashArrR_, 0, curCountR_);
- if (dstOrdered && (curCountR_ > 1)) {
+ : Arrays.copyOfRange(hashArr_, 0, curCount_);
+ if (dstOrdered && (curCount_ > 1)) {
Arrays.sort(compactCache);
}
//Create the CompactSketch
- final CompactSketch comp = createCompactSketch(
- compactCache, emptyR_, seedHashR_, curCountR_, thetaLongR_, dstOrdered, dstMem);
- reset(); //TODO
- return comp;
+ final CompactSketch csk = CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, true, dstOrdered, dstOrdered, dstMem, compactCache);
+ reset();
+ return csk;
}
-
-
//restricted
void compute() {
@@ -370,86 +371,86 @@ final class AnotBimpl extends AnotB {
switch (sw) {
case 0 : //A Null, B Null; Return (1.0, 0, T)
- thetaLongR_ = Long.MAX_VALUE;
- emptyR_ = true;
+ thetaLong_ = Long.MAX_VALUE;
+ empty_ = true;
break;
case 10: //A Empty, B Compact; CheckAB, Return (1.0, 0, T)
case 11: //A Empty, B Ordered; CheckAB, Return (1.0, 0, T)
case 12: //A Empty, B HashTbl; CheckAB, Return (1.0, 0, T)
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 1: //A Null, B Empty; CheckB, Return (1.0, 0, T)
case 2: //A Null, B Compact; CheckB, Return (1.0, 0, T)
case 3: //A Null, B Ordered; CheckB, Return (1.0, 0, T)
case 4: //A Null, B HashTbl; CheckB, Return (1.0, 0, T)
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = Long.MAX_VALUE;
- emptyR_ = true;
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = Long.MAX_VALUE;
+ empty_ = true;
break;
case 9: //A Empty, B Empty; CheckAB, Return (1.0, 0, T)
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 8: //A Empty, B Null; CheckA, Return (1.0, 0, T)
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = Long.MAX_VALUE;
- emptyR_ = true;
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = Long.MAX_VALUE;
+ empty_ = true;
break;
case 17: //A Compact, B Empty; CheckAB, Return (ThA, |A|, F), copyA
case 25: //A Ordered, B Empty; CheckAB, Return (ThA, |A|, F), copyA
case 33: //A HashTbl, B Empty; CheckAB, Return (ThA, |A|, F), copyA
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
//$FALL-THROUGH$
case 16: //A Compact, B Null; CheckA, Return (ThA, |A|, F), copyA
case 24: //A Ordered, B Null; CheckA, Return (ThA, |A|, F), copyA
case 32: //A HashTbl, B Null; CheckA, Return (ThA, |A|, F), copyA
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = skA_.getThetaLong();
- emptyR_ = false;
- curCountR_ = skA_.getRetainedEntries(true);
- hashArrR_ = CompactOperations.compactCache(skA_.getCache(), curCountR_, thetaLongR_, false);
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = skA_.getThetaLong();
+ empty_ = false;
+ curCount_ = skA_.getRetainedEntries(true);
+ hashArr_ = CompactOperations.compactCache(skA_.getCache(), curCount_, thetaLong_, false);
break;
case 18: //A Compact, B Compact; CheckAB, B -> H; => C,H; scanAllAsearchB()
case 19: //A Compact, B Ordered; CheckAB, B -> H; => C,H; scanAllAsearchB()
case 34: //A HashTbl, B Compact; CheckAB, B -> H; => H,H; scanAllAsearchB()
case 35: //A HashTbl, B Ordered; CheckAB, B -> H; => H,H; scanAllAsearchB()
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = min(skA_.getThetaLong(), skB_.getThetaLong());
- emptyR_ = false;
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
+ empty_ = false;
convertBtoHT();
scanAllAsearchB();
break;
case 26: //A Ordered, B Compact; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
case 27: //A Ordered, B Ordered; CheckAB, B -> H; => O,H; scanEarlyStopAsearchB()
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = min(skA_.getThetaLong(), skB_.getThetaLong());
- emptyR_ = false;
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
+ empty_ = false;
convertBtoHT();
scanEarlyStopAsearchB();
break;
case 20: //A Compact, B HashTbl; CheckAB, scanAllAsearchB()
case 36: //A HashTbl, B HashTbl; CheckAB, scanAllAsearchB()
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = min(skA_.getThetaLong(), skB_.getThetaLong());
- emptyR_ = false;
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
+ empty_ = false;
lgArrLongsHT_ = ((UpdateSketch)skB_).getLgArrLongs();
bHashTable_ = skB_.getCache();
scanAllAsearchB();
break;
case 28: //A Ordered, B HashTbl; CheckAB, scanEarlyStopAsearchB()
- checkSeedHashes(seedHashR_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- checkSeedHashes(seedHashR_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
- thetaLongR_ = min(skA_.getThetaLong(), skB_.getThetaLong());
- emptyR_ = false;
+ checkSeedHashes(seedHash_, skA_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ checkSeedHashes(seedHash_, skB_.getSeedHash());//lgtm [java/dereferenced-value-may-be-null]
+ thetaLong_ = min(skA_.getThetaLong(), skB_.getThetaLong());
+ empty_ = false;
lgArrLongsHT_ = ((UpdateSketch)skB_).getLgArrLongs();
bHashTable_ = skB_.getCache();
scanEarlyStopAsearchB();
@@ -463,20 +464,20 @@ final class AnotBimpl extends AnotB {
final int curCountB = skB_.getRetainedEntries(true);
lgArrLongsHT_ = computeMinLgArrLongsFromCount(curCountB);
bHashTable_ = new long[1 << lgArrLongsHT_];
- hashArrayInsert(skB_.getCache(), bHashTable_, lgArrLongsHT_, thetaLongR_);
+ hashArrayInsert(skB_.getCache(), bHashTable_, lgArrLongsHT_, thetaLong_);
}
//Sketch A is either unordered compact or hash table
private void scanAllAsearchB() {
final long[] scanAArr = skA_.getCache();
final int arrLongsIn = scanAArr.length;
- hashArrR_ = new long[arrLongsIn];
+ hashArr_ = new long[arrLongsIn];
for (int i = 0; i < arrLongsIn; i++ ) {
final long hashIn = scanAArr[i];
- if ((hashIn <= 0L) || (hashIn >= thetaLongR_)) { continue; }
+ if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
if (foundIdx > -1) { continue; }
- hashArrR_[curCountR_++] = hashIn;
+ hashArr_[curCount_++] = hashIn;
}
}
@@ -484,26 +485,26 @@ final class AnotBimpl extends AnotB {
private void scanEarlyStopAsearchB() {
final long[] scanAArr = skA_.getCache();
final int arrLongsIn = scanAArr.length;
- hashArrR_ = new long[arrLongsIn]; //maybe 2x what is needed, but getRetainedEntries can be slow.
+ hashArr_ = new long[arrLongsIn]; //maybe 2x what is needed, but getRetainedEntries can be slow.
for (int i = 0; i < arrLongsIn; i++ ) {
final long hashIn = scanAArr[i];
if (hashIn <= 0L) { continue; }
- if (hashIn >= thetaLongR_) {
+ if (hashIn >= thetaLong_) {
break; //early stop assumes that hashes in input sketch are ordered!
}
final int foundIdx = hashSearch(bHashTable_, lgArrLongsHT_, hashIn);
if (foundIdx > -1) { continue; }
- hashArrR_[curCountR_++] = hashIn;
+ hashArr_[curCount_++] = hashIn;
}
}
private void reset() {
skA_ = null;
skB_ = null;
- thetaLongR_ = Long.MAX_VALUE;
- emptyR_ = true;
- hashArrR_ = null;
- curCountR_ = 0;
+ thetaLong_ = Long.MAX_VALUE;
+ empty_ = true;
+ hashArr_ = null;
+ curCount_ = 0;
lgArrLongsHT_ = 5;
bHashTable_ = null;
}
@@ -515,12 +516,12 @@ final class AnotBimpl extends AnotB {
@Override
short getSeedHash() {
- return seedHashR_;
+ return seedHash_;
}
@Override
long getThetaLong() {
- return thetaLongR_;
+ return thetaLong_;
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/CompactOperations.java b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
index 47030fd..6c7067e 100644
--- a/src/main/java/org/apache/datasketches/theta/CompactOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/CompactOperations.java
@@ -46,6 +46,7 @@ import java.util.Arrays;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.SketchesStateException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
@@ -60,35 +61,37 @@ final class CompactOperations {
final long thetaLong,
final int curCount,
final short seedHash,
+ final boolean srcEmpty,
final boolean srcCompact,
- boolean srcOrdered,
+ final boolean srcOrdered,
final boolean dstOrdered,
final WritableMemory dstMem,
final long[] hashArr) //may not be compacted, ordered or unordered
-
{
final boolean direct = dstMem != null;
- final boolean empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);
+ final boolean empty = srcEmpty || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
final boolean single = (curCount == 1) && (thetaLong == Long.MAX_VALUE);
final long[] hashArrOut;
if (!srcCompact) {
hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered);
- srcOrdered = true;
} else {
hashArrOut = hashArr;
}
if (!srcOrdered && dstOrdered && !empty && !single) {
Arrays.sort(hashArrOut);
}
+ //Note: for empty and single we always output the ordered form.
+ final boolean dstOrderedOut = (empty || single) ? true : dstOrdered;
if (direct) {
final int preLongs = computeCompactPreLongs(thetaLong, empty, curCount);
int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK; //always LE
flags |= empty ? EMPTY_FLAG_MASK : 0;
- flags |= dstOrdered ? ORDERED_FLAG_MASK : 0;
+ flags |= dstOrderedOut ? ORDERED_FLAG_MASK : 0;
flags |= single ? SINGLEITEM_FLAG_MASK : 0;
+
final Memory mem =
- loadCompactMemory(hashArr, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs);
- if (dstOrdered) {
+ loadCompactMemory(hashArrOut, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs);
+ if (dstOrderedOut) {
return new DirectCompactOrderedSketch(mem);
} else {
return new DirectCompactUnorderedSketch(mem);
@@ -100,7 +103,7 @@ final class CompactOperations {
if (single) {
return new SingleItemSketch(hashArrOut[0], seedHash);
}
- if (dstOrdered) {
+ if (dstOrderedOut) {
return new HeapCompactOrderedSketch(hashArrOut, empty, seedHash, curCount, thetaLong);
} else {
return new HeapCompactUnorderedSketch(hashArrOut, empty, seedHash, curCount, thetaLong);
@@ -108,45 +111,117 @@ final class CompactOperations {
}
}
- @SuppressWarnings("unused")
+ /**
+ * Heapify or convert a source Theta Sketch Memory image into a heap or target Memory CompactSketch.
+ * This assumes hashSeed is OK; serVer = 3.
+ * @param srcMem the given input source Memory image
+ * @param dstOrdered the desired ordering of the resulting CompactSketch
+ * @param dstMem Used for the target CompactSketch if it is Direct.
+ * @return a CompactSketch of the correct form.
+ */
+ @SuppressWarnings("unused") //to replace CompactSketch.anyMemoryToCompactHeap
static CompactSketch memoryToCompact(
final Memory srcMem,
final boolean dstOrdered,
final WritableMemory dstMem)
{
- //extract Pre0 fields
- final int preLongs = extractPreLongs(srcMem);
- final int serVer = extractSerVer(srcMem);
- final int famId = extractFamilyID(srcMem);
- final int lgArrLongs = extractLgArrLongs(srcMem);
- final int flags = extractFlags(srcMem);
- final short seedHash = (short) extractSeedHash(srcMem);
+ //extract Pre0 fields and Flags from srcMem
+ final int srcPreLongs = extractPreLongs(srcMem);
+ final int srcSerVer = extractSerVer(srcMem); //not used
+ final int srcFamId = extractFamilyID(srcMem);
+ final Family srcFamily = Family.idToFamily(srcFamId);
+ final int srcLgArrLongs = extractLgArrLongs(srcMem);
+ final int srcFlags = extractFlags(srcMem);
+ final short srcSeedHash = (short) extractSeedHash(srcMem);
+ //srcFlags
+ final boolean srcReadOnlyFlag = (srcFlags & READ_ONLY_FLAG_MASK) > 0;
+ final boolean srcEmptyFlag = (srcFlags & EMPTY_FLAG_MASK) > 0;
+ final boolean srcCompactFlag = (srcFlags & COMPACT_FLAG_MASK) > 0;
+ final boolean srcOrderedFlag = (srcFlags & ORDERED_FLAG_MASK) > 0;
+ //final boolean srcSingleFlag = (srcFlags & SINGLEITEM_FLAG_MASK) > 0;
+ final boolean single =
+ SingleItemSketch.otherCheckForSingleItem(srcPreLongs, srcSerVer, srcFamId, srcFlags);
- final int curCount = extractCurCount(srcMem);
- final long thetaLong = extractThetaLong(srcMem);
+ //extract pre1 and pre2 fields
+ final int curCount = single ? 1 : (srcPreLongs > 1) ? extractCurCount(srcMem) : 0;
+ final long thetaLong = (srcPreLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE;
- final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
- final boolean srcCompact = (flags & COMPACT_FLAG_MASK) > 0;
- final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) > 0;
- final boolean single = (flags & SINGLEITEM_FLAG_MASK) > 0;
- if (!srcOrdered) {
+ //do some basic checks ...
+ if (srcEmptyFlag) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); }
+ if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); }
+ checkFamilyAndFlags(srcFamId, srcCompactFlag, srcReadOnlyFlag);
+ //dispatch empty and single cases
+ //Note: for empty and single we always output the ordered form.
+ final boolean dstOrderedOut = (srcEmptyFlag || single) ? true : dstOrdered;
+ if (srcEmptyFlag) {
+ if (dstMem != null) {
+ dstMem.putByteArray(0, EmptyCompactSketch.EMPTY_COMPACT_SKETCH_ARR, 0, 8);
+ return new DirectCompactOrderedSketch(dstMem);
+ } else {
+ return EmptyCompactSketch.getInstance();
+ }
+ }
+ if (single) {
+ final long hash = srcMem.getLong(srcPreLongs << 3);
+ final SingleItemSketch sis = new SingleItemSketch(hash, srcSeedHash);
+ if (dstMem != null) {
+ dstMem.putByteArray(0, sis.toByteArray(),0, 16);
+ return new DirectCompactOrderedSketch(dstMem);
+ } else { //heap
+ return sis;
+ }
}
- final long[] hashArr ;
- //do checks ...
- final boolean direct = dstMem != null;
- if (empty) { assert (curCount == 0) && (thetaLong == Long.MAX_VALUE); }
- if (single) { assert (curCount == 1) && (thetaLong == Long.MAX_VALUE); }
- if (direct) {
+ //extract hashArr > 1
+ final long[] hashArr;
+ if (srcCompactFlag) {
+ hashArr = new long[curCount];
+ srcMem.getLongArray(srcPreLongs << 3, hashArr, 0, curCount);
+ } else { //estimating, thus hashTable form
+ final int srcCacheLen = 1 << srcLgArrLongs;
+ final long[] tempHashArr = new long[srcCacheLen];
+ srcMem.getLongArray(srcPreLongs << 3, tempHashArr, 0, srcCacheLen);
+ hashArr = compactCache(tempHashArr, curCount, thetaLong, dstOrderedOut);
+ }
+
+ //load the destination.
+ if (dstMem != null) {
+ final Memory tgtMem = loadCompactMemory(hashArr, srcSeedHash, curCount, thetaLong, dstMem,
+ (byte)srcFlags, srcPreLongs);
+ if (dstOrderedOut) {
+ return new DirectCompactOrderedSketch(tgtMem);
+ } else {
+ return new DirectCompactUnorderedSketch(tgtMem);
+ }
} else { //heap
- //dispatch empty and single
- //dispatch other
+ if (dstOrderedOut) {
+ return new HeapCompactOrderedSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong);
+ } else {
+ return new HeapCompactUnorderedSketch(hashArr, srcEmptyFlag, srcSeedHash, curCount, thetaLong);
+ }
}
- return null;
+ }
+
+ private static final void checkFamilyAndFlags(
+ final int srcFamId,
+ final boolean srcCompactFlag,
+ final boolean srcReadOnlyFlag) {
+ final Family srcFamily = Family.idToFamily(srcFamId);
+ if (srcCompactFlag) {
+ if ((srcFamily == Family.COMPACT) && srcReadOnlyFlag) { return; }
+ } else {
+ if (srcFamily == Family.ALPHA) { return; }
+ if (srcFamily == Family.QUICKSELECT) { return; }
+ }
+ throw new SketchesArgumentException(
+ "Possible Corruption: Family does not match flags: Family: "
+ + srcFamily.toString()
+ + ", Compact Flag: " + srcCompactFlag
+ + ", ReadOnly Flag: " + srcReadOnlyFlag);
}
//All arguments must be valid and correct including flags.
@@ -200,15 +275,16 @@ final class CompactOperations {
}
/**
- * Compact the given array. The source cache can be a hash table with interstitial zeros or
- * "dirty" values, which are hash values greater than theta. These can be generated by the
- * Alpha sketch.
+ * Copies then compacts, cleans, and may sort the resulting array.
+ * The source cache can be a hash table with interstitial zeros or
+ * "dirty" values, which are hash values greater than theta.
+ * These can be generated by the Alpha sketch.
* @param srcCache anything
* @param curCount must be correct
* @param thetaLong The correct
* <a href="{@docRoot}/resources/dictionary.html#thetaLong">thetaLong</a>.
* @param dstOrdered true if output array must be sorted
- * @return the compacted array
+ * @return the compacted array.
*/
static final long[] compactCache(final long[] srcCache, final int curCount,
final long thetaLong, final boolean dstOrdered) {
@@ -223,15 +299,15 @@ final class CompactOperations {
if ((v <= 0L) || (v >= thetaLong) ) { continue; } //ignoring zeros or dirty values
cacheOut[j++] = v;
}
- assert curCount == j;
+ if (j < curCount) {
+ throw new SketchesStateException(
+ "Possible Corruption: curCount parameter is incorrect.");
+ }
if (dstOrdered && (curCount > 1)) {
Arrays.sort(cacheOut);
}
return cacheOut;
}
-
-
-
}
diff --git a/src/main/java/org/apache/datasketches/theta/CompactSketch.java b/src/main/java/org/apache/datasketches/theta/CompactSketch.java
index dd667d6..faafd5c 100644
--- a/src/main/java/org/apache/datasketches/theta/CompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/CompactSketch.java
@@ -19,19 +19,7 @@
package org.apache.datasketches.theta;
-import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
-import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
-import static org.apache.datasketches.theta.PreambleUtil.extractLgArrLongs;
-import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
-import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
-
-import java.util.Arrays;
-
import org.apache.datasketches.Family;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
@@ -67,55 +55,4 @@ public abstract class CompactSketch extends Sketch {
return true;
}
- //restricted methods
-
- /**
- * Heapifies the given source Memory with seedHash. We assume that the destination sketch type has
- * been determined to be Compact and that the memory image is valid and the seedHash is correct.
- * @param srcMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
- * @param seedHash <a href="{@docRoot}/resources/dictionary.html#seedHash">See Seed Hash</a>.
- * @return a CompactSketch
- */
- static CompactSketch You(final Memory srcMem, final short seedHash,
- final boolean dstOrdered) {
- final int flags = extractFlags(srcMem);
- final boolean empty = (flags & EMPTY_FLAG_MASK) > 0;
- if (empty) { return EmptyCompactSketch.getInstance(); }
- //EMPTY FLAG is FALSE
- final int preLongs = extractPreLongs(srcMem);
- final boolean single = (flags & SINGLEITEM_FLAG_MASK) > 0;
-
- if (preLongs == 1) {
- if (single) {
- return new SingleItemSketch(srcMem.getLong(8), seedHash);
- } else {
- return EmptyCompactSketch.getInstance();
- }
- }
- //preLongs > 1
- final int curCount = extractCurCount(srcMem);
- final long thetaLong = (preLongs > 2) ? extractThetaLong(srcMem) : Long.MAX_VALUE;
- final boolean srcCompact = (flags & COMPACT_FLAG_MASK) > 0;
- final long[] hashArrOut;
- if (srcCompact) {
- if ((curCount == 0) && (thetaLong == Long.MAX_VALUE)) {
- return EmptyCompactSketch.getInstance();
- }
- if ((curCount == 1) && (thetaLong == Long.MAX_VALUE)) {
- //TODO
- }
- hashArrOut = new long[curCount];
- srcMem.getLongArray(8 * preLongs, hashArrOut, 0, curCount);
- if (dstOrdered) { Arrays.sort(hashArrOut); }
- } else { //src is hashTable
- final int lgArrLongs = extractLgArrLongs(srcMem);
- final long[] hashArr = new long[1 << lgArrLongs];
- srcMem.getLongArray(8 * preLongs, hashArr, 0, 1 << lgArrLongs);
- hashArrOut = CompactOperations.compactCache(hashArr, curCount, thetaLong, dstOrdered);
- }
- return dstOrdered
- ? new HeapCompactOrderedSketch(hashArrOut, empty, seedHash, curCount, thetaLong)
- : new HeapCompactUnorderedSketch(hashArrOut, empty, seedHash, curCount, thetaLong);
- }
-
}
diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactOrderedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactOrderedSketch.java
index 2fc662b..e2c9de5 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectCompactOrderedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectCompactOrderedSketch.java
@@ -19,15 +19,9 @@
package org.apache.datasketches.theta;
-import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash;
import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
/**
* An off-heap (Direct), compact, ordered, read-only sketch. This sketch can only be associated
@@ -58,46 +52,6 @@ final class DirectCompactOrderedSketch extends DirectCompactSketch {
return new DirectCompactOrderedSketch(srcMem);
}
- /**
- * Constructs this sketch from correct, valid components.
- * @param cache in compact, ordered form
- * @param empty The correct <a href="{@docRoot}/resources/dictionary.html#empty">Empty</a>.
- * @param seedHash The correct
- * <a href="{@docRoot}/resources/dictionary.html#seedHash">Seed Hash</a>.
- * @param curCount correct value
- * @param thetaLong The correct
- * <a href="{@docRoot}/resources/dictionary.html#thetaLong">thetaLong</a>.
- * @param dstMem the given destination Memory. This clears it before use.
- * @return a DirectCompactOrderedSketch
- */
- static DirectCompactOrderedSketch compact(
- final long[] cache,
- final boolean empty,
- final short seedHash,
- final int curCount,
- final long thetaLong,
- final WritableMemory dstMem) {
- final int preLongs = CompactOperations.computeCompactPreLongs(thetaLong, empty, curCount);
- int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK | ORDERED_FLAG_MASK;
- flags |= empty ? EMPTY_FLAG_MASK : 0;
- flags |= (curCount == 1) ? SINGLEITEM_FLAG_MASK : 0;
- CompactOperations.loadCompactMemory(cache, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs);
- return new DirectCompactOrderedSketch(dstMem);
- }
-
- @Override //ordered, on-heap
- public CompactSketch compact() {
- //TODO
- return null;
- }
-
- @Override
- public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) {
- //TODO
- return null;
- }
-
-
@Override
public boolean isOrdered() {
return true;
diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
index bc62a09..6b65640 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectCompactSketch.java
@@ -23,8 +23,10 @@ import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
import static org.apache.datasketches.theta.PreambleUtil.extractSeedHash;
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
+import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem;
import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
/**
* Parent class of the Direct Compact Sketches.
@@ -40,82 +42,39 @@ abstract class DirectCompactSketch extends CompactSketch {
//Sketch
-// @Override //ordered, on-heap
-// public CompactSketch compact() {
-// //TODO
-// return null;
-// }
-//
-// @Override
-// public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) {
-// final int srcFlags = extractFlags(mem_);
-// final boolean srcOrdered = (srcFlags & ORDERED_FLAG_MASK) > 0;
-// final int srcPreLongs = extractPreLongs(mem_);
-// final int srcCurCount = (srcPreLongs == 1) ? 0 : extractCurCount(mem_);
-// final long srcThetaLong = (srcPreLongs <= 2) ? Long.MAX_VALUE : extractThetaLong(mem_);
-// final int bytes = (srcPreLongs + srcCurCount) << 3;
-// if (srcCurCount == 0) {
-// if (srcThetaLong == Long.MAX_VALUE) {
-// //this sets the ordered to true independent of the dstOrdered request
-// return EmptyCompactSketch.getInstance().compact(true, wmem);
-// } else {
-// assert srcPreLongs == 3 : "Theta < 1.0, thus PreLong must be 3: " + srcPreLongs;
-// mem_.copyTo(0, wmem, 0, srcPreLongs << 3);
-// if (dstOrdered) {
-// return new DirectCompactOrderedSketch(wmem);
-// } else {
-// return new DirectCompactUnorderedSketch(wmem);
-// }
-// }
-// }
-// if (srcCurCount == 1) {
-// if (srcThetaLong == Long.MAX_VALUE) {
-// //TODO
-// }
-// }
-// if (!srcOrdered && dstOrdered) { //sort this src mem and place in wmem
-// if (srcCurCount == 0) {
-// final long thetaLong = extractThetaLong(mem_);
-// if (thetaLong == Long.MAX_VALUE) {
-// //TODO
-// }
-// } else {
-// final byte[] srcBytes = new byte[bytes];
-// mem_.getByteArray(0, srcBytes, 0, bytes);
-// wmem.putByteArray(0, srcBytes, 0, bytes);
-// final byte dstFlags = (byte) (srcFlags & ORDERED_FLAG_MASK);
-// wmem.putByte(FLAGS_BYTE, dstFlags);
-// }
-//
-// } else {
-// mem_.copyTo(0, wmem, 0, bytes);
-// }
-//
-// return null; //TODO
-// }
-
- //overidden by EmptyCompactSketch and SingleItemSketch
+ @Override
+ public CompactSketch compact() {
+ return compact(true, null);
+ }
+
+ @Override
+ public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+ return CompactOperations.memoryToCompact(mem_, dstOrdered, dstMem);
+ }
+
@Override
public int getCurrentBytes(final boolean compact) { //compact is ignored here
- final int preLongs = getCurrentPreambleLongs(true);
- //preLongs > 1
- final int curCount = extractCurCount(mem_);
+ if (otherCheckForSingleItem(mem_)) { return 16; }
+ final int preLongs = extractPreLongs(mem_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_);
return (preLongs + curCount) << 3;
}
@Override
public double getEstimate() {
- final int curCount = extractCurCount(mem_);
+ if (otherCheckForSingleItem(mem_)) { return 1; }
final int preLongs = extractPreLongs(mem_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_);
final long thetaLong = (preLongs > 2) ? extractThetaLong(mem_) : Long.MAX_VALUE;
return Sketch.estimate(thetaLong, curCount);
}
- //overidden by EmptyCompactSketch and SingleItemSketch
@Override
public int getRetainedEntries(final boolean valid) { //compact is always valid
- //preLongs > 1
- return extractCurCount(mem_);
+ if (otherCheckForSingleItem(mem_)) { return 1; }
+ final int preLongs = extractPreLongs(mem_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_);
+ return curCount;
}
@Override
@@ -136,7 +95,10 @@ abstract class DirectCompactSketch extends CompactSketch {
@Override
public boolean isEmpty() {
- return PreambleUtil.isEmptySketch(mem_);
+ final boolean emptyFlag = PreambleUtil.isEmptyFlag(mem_);
+ final long thetaLong = getThetaLong();
+ final int curCount = getRetainedEntries(true);
+ return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
}
@Override
@@ -149,28 +111,26 @@ abstract class DirectCompactSketch extends CompactSketch {
return new MemoryHashIterator(mem_, getRetainedEntries(), getThetaLong());
}
- @Override //order is already determined.
+ @Override
public byte[] toByteArray() {
final int curCount = getRetainedEntries(true);
Sketch.checkIllegalCurCountAndEmpty(isEmpty(), curCount);
- final int preLongs = getCurrentPreambleLongs(true);
+ final int preLongs = extractPreLongs(mem_);
final int outBytes = (curCount + preLongs) << 3;
final byte[] byteArrOut = new byte[outBytes];
- mem_.getByteArray(0, byteArrOut, 0, outBytes); //copies the whole thing
+ mem_.getByteArray(0, byteArrOut, 0, outBytes);
return byteArrOut;
}
//restricted methods
-
-
-
@Override
long[] getCache() {
- final int curCount = getRetainedEntries(true);
+ if (otherCheckForSingleItem(mem_)) { return new long[] { mem_.getLong(8) }; }
+ final int preLongs = extractPreLongs(mem_);
+ final int curCount = (preLongs == 1) ? 0 : extractCurCount(mem_);
if (curCount > 0) {
final long[] cache = new long[curCount];
- final int preLongs = getCurrentPreambleLongs(true);
mem_.getLongArray(preLongs << 3, cache, 0, curCount);
return cache;
}
diff --git a/src/main/java/org/apache/datasketches/theta/DirectCompactUnorderedSketch.java b/src/main/java/org/apache/datasketches/theta/DirectCompactUnorderedSketch.java
index f8743ef..15b4433 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectCompactUnorderedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectCompactUnorderedSketch.java
@@ -19,14 +19,9 @@
package org.apache.datasketches.theta;
-import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
-import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash;
import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
/**
* An off-heap (Direct), compact, unordered, read-only sketch. This sketch can only be associated
@@ -57,45 +52,6 @@ final class DirectCompactUnorderedSketch extends DirectCompactSketch {
return new DirectCompactUnorderedSketch(srcMem);
}
- /**
- * Constructs this sketch from correct, valid components.
- * @param cache in compact, ordered form
- * @param empty The correct <a href="{@docRoot}/resources/dictionary.html#empty">Empty</a>.
- * @param seedHash The correct
- * <a href="{@docRoot}/resources/dictionary.html#seedHash">Seed Hash</a>.
- * @param curCount correct value
- * @param thetaLong The correct
- * <a href="{@docRoot}/resources/dictionary.html#thetaLong">thetaLong</a>.
- * @param dstMem the given destination Memory. This clears it before use.
- * @return a DirectCompactUnorderedSketch
- */
- static DirectCompactUnorderedSketch compact(
- final long[] cache,
- final boolean empty,
- final short seedHash,
- final int curCount,
- final long thetaLong,
- final WritableMemory dstMem) {
- final int preLongs = CompactOperations.computeCompactPreLongs(thetaLong, empty, curCount);
- int flags = READ_ONLY_FLAG_MASK | COMPACT_FLAG_MASK;
- flags |= empty ? EMPTY_FLAG_MASK : 0;
- flags |= (curCount == 1) ? SINGLEITEM_FLAG_MASK : 0;
- CompactOperations.loadCompactMemory(cache, seedHash, curCount, thetaLong, dstMem, (byte)flags, preLongs);
- return new DirectCompactUnorderedSketch(dstMem);
- }
-
- @Override //ordered, on-heap
- public CompactSketch compact() {
- //TODO
- return null;
- }
-
- @Override
- public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) {
- //TODO
- return null;
- }
-
@Override
public boolean isOrdered() {
return false;
diff --git a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
index 7900451..97eb82a 100644
--- a/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
+++ b/src/main/java/org/apache/datasketches/theta/DirectQuickSelectSketchR.java
@@ -155,7 +155,7 @@ class DirectQuickSelectSketchR extends UpdateSketch {
@Override
public boolean isEmpty() {
- return PreambleUtil.isEmptySketch(wmem_);
+ return PreambleUtil.isEmptyFlag(wmem_);
}
@Override
diff --git a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
index 32a73c4..d41beb4 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapAlphaSketch.java
@@ -146,7 +146,7 @@ final class HeapAlphaSketch extends HeapUpdateSketch {
has.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
has.curCount_ = extractCurCount(srcMem);
has.thetaLong_ = extractThetaLong(srcMem);
- has.empty_ = PreambleUtil.isEmptySketch(srcMem);
+ has.empty_ = PreambleUtil.isEmptyFlag(srcMem);
has.cache_ = new long[1 << lgArrLongs];
srcMem.getLongArray(preambleLongs << 3, has.cache_, 0, 1 << lgArrLongs); //read in as hash table
return has;
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactOrderedSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactOrderedSketch.java
index 48c38fb..dbaf823 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactOrderedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactOrderedSketch.java
@@ -45,16 +45,16 @@ final class HeapCompactOrderedSketch extends HeapCompactSketch {
//Sketch interface
- @Override //ordered, on-heap
+ @Override
public CompactSketch compact() {
- //TODO
- return null;
+ return this;
}
@Override
- public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) {
- //TODO
- return null;
+ public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+ return CompactOperations.componentsToCompact(
+ getThetaLong(), getRetainedEntries(), getSeedHash(), isEmpty(),
+ true, true, dstOrdered, dstMem, getCache());
}
@Override
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
index b78002d..74dff7d 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactSketch.java
@@ -124,6 +124,7 @@ abstract class HeapCompactSketch extends CompactSketch {
return seedHash_;
}
+ //only called from sub classes
byte[] toByteArray(final boolean ordered) {
Sketch.checkIllegalCurCountAndEmpty(empty_, curCount_);
final int bytes = getCurrentBytes(true);
diff --git a/src/main/java/org/apache/datasketches/theta/HeapCompactUnorderedSketch.java b/src/main/java/org/apache/datasketches/theta/HeapCompactUnorderedSketch.java
index 4fc35c3..6b59a02 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapCompactUnorderedSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapCompactUnorderedSketch.java
@@ -19,12 +19,6 @@
package org.apache.datasketches.theta;
-import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash;
-import static org.apache.datasketches.theta.PreambleUtil.extractCurCount;
-import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
-import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
-
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
/**
@@ -49,41 +43,18 @@ final class HeapCompactUnorderedSketch extends HeapCompactSketch {
super(cache, empty, seedHash, curCount, thetaLong);
}
- /**
- * Heapifies the given source Memory with seed
- * @param srcMem <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
- * @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
- * @return a CompactSketch
- */
- //Note Empty and SingleItemSketches should be filtered out before we get here.
- static CompactSketch heapifyInstance(final Memory srcMem, final long seed) {
- final short memSeedHash = checkMemorySeedHash(srcMem, seed);
- final int preLongs = extractPreLongs(srcMem);
- final boolean empty = PreambleUtil.isEmptySketch(srcMem);
- long thetaLong = Long.MAX_VALUE;
- final int curCount = extractCurCount(srcMem);
- final long[] cache = new long[curCount];
- if (preLongs == 2) {
- srcMem.getLongArray(16, cache, 0, curCount);
- } else { //preLongs == 3
- srcMem.getLongArray(24, cache, 0, curCount);
- thetaLong = extractThetaLong(srcMem);
- }
- return new HeapCompactUnorderedSketch(cache, empty, memSeedHash, curCount, thetaLong);
- }
-
//Sketch interface
@Override //ordered, on-heap
public CompactSketch compact() {
- //TODO
- return null;
+ return compact(true, null);
}
@Override
- public CompactSketch compact(final boolean dstOrdered, final WritableMemory wmem) {
- //TODO
- return null;
+ public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
+ return CompactOperations.componentsToCompact(
+ getThetaLong(), getRetainedEntries(), getSeedHash(), isEmpty(),
+ true, false, dstOrdered, dstMem, getCache());
}
@Override
diff --git a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
index 7fe3e9e..1090d9e 100644
--- a/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/HeapQuickSelectSketch.java
@@ -133,7 +133,7 @@ class HeapQuickSelectSketch extends HeapUpdateSketch {
hqss.hashTableThreshold_ = setHashTableThreshold(lgNomLongs, lgArrLongs);
hqss.curCount_ = extractCurCount(srcMem);
hqss.thetaLong_ = extractThetaLong(srcMem);
- hqss.empty_ = PreambleUtil.isEmptySketch(srcMem);
+ hqss.empty_ = PreambleUtil.isEmptyFlag(srcMem);
hqss.cache_ = new long[1 << lgArrLongs];
srcMem.getLongArray(preambleLongs << 3, hqss.cache_, 0, 1 << lgArrLongs); //read in as hash table
return hqss;
diff --git a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
index 0cc7517..6b79a24 100644
--- a/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
+++ b/src/main/java/org/apache/datasketches/theta/IntersectionImplR.java
@@ -159,8 +159,8 @@ class IntersectionImplR extends Intersection {
if (curCount_ == 0) {
compactCacheR = new long[0];
- return createCompactSketch(
- compactCacheR, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem);
+ return CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, true, false, dstOrdered, dstMem, compactCacheR);
}
//else curCount > 0
final long[] hashTable;
@@ -174,8 +174,8 @@ class IntersectionImplR extends Intersection {
compactCacheR = compactCachePart(hashTable, lgArrLongs_, curCount_, thetaLong_, dstOrdered);
//Create the CompactSketch
- return createCompactSketch(
- compactCacheR, empty_, seedHash_, curCount_, thetaLong_, dstOrdered, dstMem);
+ return CompactOperations.componentsToCompact(
+ thetaLong_, curCount_, seedHash_, empty_, true, dstOrdered, dstOrdered, dstMem, compactCacheR);
}
@Override
diff --git a/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java b/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
index a73e7eb..3ffa6ac 100644
--- a/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
+++ b/src/main/java/org/apache/datasketches/theta/PairwiseSetOperations.java
@@ -19,12 +19,14 @@
package org.apache.datasketches.theta;
-import static org.apache.datasketches.theta.SetOperation.createCompactSketch;
+import static org.apache.datasketches.Util.DEFAULT_NOMINAL_ENTRIES;
+import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
+import static org.apache.datasketches.Util.checkSeedHashes;
+import static org.apache.datasketches.Util.computeSeedHash;
import java.util.Arrays;
import org.apache.datasketches.SketchesArgumentException;
-import org.apache.datasketches.Util;
/**
* Set Operations where the arguments are presented in pairs as in <i>C = Op(A,B)</i>. These are
@@ -101,7 +103,7 @@ public class PairwiseSetOperations {
*/
@Deprecated
public static CompactSketch union(final CompactSketch skA, final CompactSketch skB) {
- return union(skA, skB, Util.DEFAULT_NOMINAL_ENTRIES);
+ return union(skA, skB, DEFAULT_NOMINAL_ENTRIES);
}
/**
@@ -216,7 +218,10 @@ public class PairwiseSetOperations {
} else {
outArr = Arrays.copyOf(outCache, curCount); //copy only valid items
}
- return createCompactSketch(outArr, false, skA.getSeedHash(), curCount, thetaLong, true, null);
+ final short seedHash = computeSeedHash(DEFAULT_UPDATE_SEED);
+ final boolean srcEmpty = (curCount == 0) && (thetaLong == Long.MAX_VALUE);
+ return CompactOperations.componentsToCompact(
+ thetaLong, curCount, seedHash, srcEmpty, true, true, true, null, outArr);
}
private static CompactSketch maybeCutback(final CompactSketch csk, final int k) {
@@ -228,7 +233,9 @@ public class PairwiseSetOperations {
thetaLong = cache[k];
final long[] arr = Arrays.copyOf(cache, k);
curCount = k;
- return createCompactSketch(arr, empty, csk.getSeedHash(), curCount, thetaLong, true, null);
+ final short seedHash = computeSeedHash(DEFAULT_UPDATE_SEED);
+ return CompactOperations.componentsToCompact(
+ thetaLong, curCount, seedHash, empty, true, false, true, null, cache);
}
return csk;
}
@@ -242,7 +249,7 @@ public class PairwiseSetOperations {
private static short seedHashesCheck(final Sketch skA, final Sketch skB) {
final short seedHashA = skA.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null]
final short seedHashB = skB.getSeedHash(); //lgtm [java/dereferenced-value-may-be-null]
- return Util.checkSeedHashes(seedHashA, seedHashB);
+ return checkSeedHashes(seedHashA, seedHashB);
}
}
diff --git a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java
index a61e16c..8baf634 100644
--- a/src/main/java/org/apache/datasketches/theta/PreambleUtil.java
+++ b/src/main/java/org/apache/datasketches/theta/PreambleUtil.java
@@ -466,77 +466,6 @@ final class PreambleUtil {
return ((extractFlags(mem) & EMPTY_FLAG_MASK) > 0);
}
- private static final int ALPHA_ID = Family.ALPHA.getID();
- private static final int QUICKSELECT_ID = Family.QUICKSELECT.getID();
- private static final int COMPACT_ID = Family.COMPACT.getID();
- private static final int UNION_ID = Family.UNION.getID();
-
- static boolean isEmptySketch(final Memory mem) {
- //CHECK FamID
- final int famId = extractFamilyID(mem);
- if ( !((famId == ALPHA_ID) || (famId == QUICKSELECT_ID)
- || (famId == COMPACT_ID) || (famId == UNION_ID))) {
- throw new SketchesArgumentException("Not part of the Theta Sketch Family");
- }
- //CHECK PreLongs
- final int preLongs = extractPreLongs(mem);
- if ((preLongs < 1) || (preLongs > 4)) {
- throw new SketchesArgumentException("Possible corruption: Illegal preLongs value: " + preLongs);
- }
- //CHECK SerVer
- final int serVer = extractSerVer(mem);
- if ((serVer < 1) || (serVer > 3)) {
- throw new SketchesArgumentException("Possible corruption: Illegal serVer value: " + serVer);
- }
- if (serVer == 1) { //Prelongs is always 3, no empty flag
- assert preLongs == 3;
- return ((extractCurCount(mem) == 0) && (extractThetaLong(mem) == Long.MAX_VALUE));
- }
- // Flags byte: SI, Ordered, Compact, Empty, ReadOnly, LittleEndian = 0XX1X0
- // Flags mask = 100101 = 0x25; Flags compare = 000100 = 0x4
- final int flags = extractFlags(mem);
- final boolean emptyFlag = (flags & 0x25) == EMPTY_FLAG_MASK;
- if (serVer == 2) {
- if (preLongs == 1) { return true; }
- final int curCount = extractCurCount(mem);
- if (preLongs == 2) {
- return emptyFlag || (curCount == 0);
- }
- final long thetaLong = extractThetaLong(mem);
- if (preLongs == 3) {
- return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
- }
- }
- if (serVer == 3) {
- final boolean emptyCap = mem.getCapacity() < 16L;
- if (preLongs == 1) { return emptyFlag || emptyCap; }
- final int curCount = extractCurCount(mem);
- if (preLongs == 2) {
- return emptyFlag || (curCount == 0);
- }
- final long thetaLong = extractThetaLong(mem);
- if (preLongs <= 4) {
- return emptyFlag || ((curCount == 0) && (thetaLong == Long.MAX_VALUE));
- }
- }
- assert false : "Should not get here";
- return true;
- }
-
- static boolean isSingleItemSketch(final Memory mem) {
- // Flags byte: SI, Ordered, Compact, NotEmpty, ReadOnly, LittleEndian = X11010 = 0x1A.
- // Flags mask will be 0x1F.
- // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now.
- // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3,
- // and the hash seed matches (not done here), it is virtually guaranteed that we have a
- // SingleItem Sketch.
- final boolean preLongs = extractPreLongs(mem) == 1;
- final boolean serVer = extractSerVer(mem) >= 3;
- final boolean famId = extractFamilyID(mem) == Family.COMPACT.getID();
- final boolean flags = (extractFlags(mem) & 0x1F) == 0x1A; //no SI, yet
- return preLongs && serVer && famId && flags;
- }
-
/**
* Checks Memory for capacity to hold the preamble and returns the extracted preLongs.
* @param mem the given Memory
diff --git a/src/main/java/org/apache/datasketches/theta/SetOperation.java b/src/main/java/org/apache/datasketches/theta/SetOperation.java
index b4fc4cf..89eed19 100644
--- a/src/main/java/org/apache/datasketches/theta/SetOperation.java
+++ b/src/main/java/org/apache/datasketches/theta/SetOperation.java
@@ -27,7 +27,6 @@ import static org.apache.datasketches.Util.REBUILD_THRESHOLD;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
-import static org.apache.datasketches.theta.Sketch.checkIllegalCurCountAndEmpty;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
@@ -75,11 +74,11 @@ public abstract class SetOperation {
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a Heap-based SetOperation from the given Memory
- */ //TODO Do we need to add a stateful AnotB here?
+ */
public static SetOperation heapify(final Memory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
- switch (family) {
+ switch (family) { //TODO Do we need to add the stateful AnotB ?
case UNION : {
return UnionImpl.heapifyInstance(srcMem, seed);
}
@@ -115,7 +114,7 @@ public abstract class SetOperation {
* <a href="{@docRoot}/resources/dictionary.html#mem">See Memory</a>
* @param seed <a href="{@docRoot}/resources/dictionary.html#seed">See Update Hash Seed</a>.
* @return a SetOperation backed by the given Memory
- */ //TODO Do we need to add a stateful AnotB here?
+ */
public static SetOperation wrap(final Memory srcMem, final long seed) {
final byte famID = srcMem.getByte(FAMILY_BYTE);
final Family family = idToFamily(famID);
@@ -123,7 +122,7 @@ public abstract class SetOperation {
if (serVer != 3) {
throw new SketchesArgumentException("SerVer must be 3: " + serVer);
}
- switch (family) {
+ switch (family) { //TODO Do we need to add the stateful AnotB ?
case UNION : {
return UnionImpl.wrapInstance(srcMem, seed);
}
@@ -247,49 +246,6 @@ public abstract class SetOperation {
//intentionally not made public because behavior will be confusing to end user.
abstract boolean isEmpty();
- //used only by the set operations
- static final CompactSketch createCompactSketch(
- final long[] compactCache,
- boolean empty,
- final short seedHash,
- final int curCount,
- final long thetaLong,
- final boolean dstOrdered,
- final WritableMemory dstMem) {
- checkIllegalCurCountAndEmpty(empty, curCount);
- empty = correctEmptyOnSetResult(curCount, thetaLong);
- if (empty) {
- final EmptyCompactSketch sk = EmptyCompactSketch.getInstance();
- if (dstMem != null) {
- dstMem.putByteArray(0, sk.toByteArray(), 0, 8);
- }
- return sk;
- }
- //Not Empty
- if ((thetaLong == Long.MAX_VALUE) && (curCount == 1)) {
- final SingleItemSketch sis = new SingleItemSketch(compactCache[0], seedHash);
- if ((dstMem != null) && (dstMem.getCapacity() >= 16)) {
- dstMem.putByteArray(0, sis.toByteArray(), 0, 16);
- }
- return sis;
- }
- if (dstMem == null) {
- if (dstOrdered) {
- return new HeapCompactOrderedSketch(compactCache, empty, seedHash, curCount, thetaLong);
- } else {
- return new HeapCompactUnorderedSketch(compactCache, empty, seedHash, curCount, thetaLong);
- }
- } else {
- if (dstOrdered) {
- return DirectCompactOrderedSketch.compact(compactCache, empty, seedHash, curCount,
- thetaLong, dstMem);
- } else {
- return DirectCompactUnorderedSketch.compact(compactCache, empty, seedHash, curCount,
- thetaLong, dstMem);
- }
- }
- }
-
/**
* Computes minimum lgArrLongs from a current count.
* @param count the given current count
diff --git a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java
index 822d88f..b78ac3b 100644
--- a/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java
+++ b/src/main/java/org/apache/datasketches/theta/SetOperationBuilder.java
@@ -210,7 +210,7 @@ public class SetOperationBuilder {
setOp = new AnotBimpl(bSeed);
}
else {
- throw new SketchesArgumentException(
+ throw new SketchesArgumentException( //TODO we should be able to do this now.
"AnotB is only on heap and cannot be persisted.");
}
break;
diff --git a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java
index 38bf862..5356dbc 100644
--- a/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/SingleItemSketch.java
@@ -24,9 +24,14 @@ import static org.apache.datasketches.ByteArrayUtil.putLongLE;
import static org.apache.datasketches.Util.DEFAULT_UPDATE_SEED;
import static org.apache.datasketches.Util.computeSeedHash;
import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.theta.PreambleUtil.SINGLEITEM_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.checkMemorySeedHash;
-import static org.apache.datasketches.theta.PreambleUtil.isSingleItemSketch;
+import static org.apache.datasketches.theta.PreambleUtil.extractFamilyID;
+import static org.apache.datasketches.theta.PreambleUtil.extractFlags;
+import static org.apache.datasketches.theta.PreambleUtil.extractPreLongs;
+import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
+import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;
@@ -89,24 +94,24 @@ final class SingleItemSketch extends CompactSketch {
*/ //does not override Sketch
public static SingleItemSketch heapify(final Memory srcMem, final long seed) {
final short seedHashMem = checkMemorySeedHash(srcMem, seed);
- if (isSingleItemSketch(srcMem)) {
- return new SingleItemSketch(srcMem.getLong(8), seedHashMem);
- }
- throw new SketchesArgumentException("Input Memory Preamble is not a SingleItemSketch.");
+ final boolean singleItem = otherCheckForSingleItem(srcMem);
+ if (singleItem) { return new SingleItemSketch(srcMem.getLong(8), seedHashMem); }
+ throw new SketchesArgumentException("Input Memory is not a SingleItemSketch.");
}
@Override
public CompactSketch compact() {
- final long[] hashArr = getCache();
- final short seedHash = getSeedHash();
- return new HeapCompactOrderedSketch(hashArr, false, seedHash, 1, Long.MAX_VALUE);
+ return this;
}
@Override
public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
- dstMem.putLong(0, pre0_);
- dstMem.putLong(8, hash_);
- return new DirectCompactOrderedSketch(dstMem);
+ if (dstMem == null) { return this; }
+ else {
+ dstMem.putLong(0, pre0_);
+ dstMem.putLong(8, hash_);
+ return new DirectCompactOrderedSketch(dstMem);
+ }
}
//Create methods using the default seed
@@ -404,4 +409,25 @@ final class SingleItemSketch extends CompactSketch {
return (short) (pre0_ >>> 48);
}
+ static final boolean otherCheckForSingleItem(final Memory mem) {
+ return otherCheckForSingleItem(extractPreLongs(mem), extractSerVer(mem),
+ extractFamilyID(mem), extractFlags(mem) );
+ }
+
+ static final boolean otherCheckForSingleItem(final int preLongs, final int serVer,
+ final int famId, final int flags) {
+ // Flags byte: SI=X, Ordered=T, Compact=T, Empty=F, ReadOnly=T, BigEndian=F = X11010 = 0x1A.
+ // Flags mask will be 0x1F.
+ // SingleItem flag may not be set due to a historical bug, so we can't depend on it for now.
+ // However, if the above flags are correct, preLongs == 1, SerVer >= 3, FamilyID == 3,
+ // and the hash seed matches (not done here), it is virtually guaranteed that we have a
+ // SingleItem Sketch.
+ final boolean numPreLongs = preLongs == 1;
+ final boolean numSerVer = serVer >= 3;
+ final boolean numFamId = famId == Family.COMPACT.getID();
+ final boolean numFlags = (flags & 0x1F) == 0x1A; //no SI, yet
+ final boolean singleFlag = (flags & SINGLEITEM_FLAG_MASK) > 0;
+ return (numPreLongs && numSerVer && numFamId && numFlags) || singleFlag;
+ }
+
}
diff --git a/src/main/java/org/apache/datasketches/theta/Sketch.java b/src/main/java/org/apache/datasketches/theta/Sketch.java
index c54d521..42f4724 100644
--- a/src/main/java/org/apache/datasketches/theta/Sketch.java
+++ b/src/main/java/org/apache/datasketches/theta/Sketch.java
@@ -27,13 +27,14 @@ import static org.apache.datasketches.Util.LS;
import static org.apache.datasketches.Util.ceilingPowerOf2;
import static org.apache.datasketches.Util.zeroPad;
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
+import static org.apache.datasketches.theta.PreambleUtil.EMPTY_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.FLAGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.ORDERED_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.PREAMBLE_LONGS_BYTE;
import static org.apache.datasketches.theta.PreambleUtil.READ_ONLY_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.SER_VER_BYTE;
-import static org.apache.datasketches.theta.PreambleUtil.isSingleItemSketch;
+import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem;
import org.apache.datasketches.BinomialBoundsN;
import org.apache.datasketches.Family;
@@ -142,12 +143,12 @@ public abstract class Sketch {
"Corrupted: " + family + " family image: must have SerVer = 3 and preLongs = 3");
}
}
- case COMPACT: { //serVer 1, 2, or 3, preLongs = 1, 2, or 3
+ case COMPACT: { //serVer 1, 2, 3; preLongs = 1, 2, or 3
if (serVer == 3) {
- if (PreambleUtil.isEmptySketch(srcMem)) {
+ if (PreambleUtil.isEmptyFlag(srcMem)) {
return EmptyCompactSketch.getInstance(srcMem);
}
- if (isSingleItemSketch(srcMem)) { //SINGLEITEM?
+ if (otherCheckForSingleItem(srcMem)) { //SINGLEITEM?
return SingleItemSketch.heapify(srcMem, seed);
}
//not empty & not singleItem
@@ -184,26 +185,33 @@ public abstract class Sketch {
//Sketch interface
/**
- * Converts this sketch as an ordered CompactSketch on the Java heap.
+ * Converts this sketch to a ordered CompactSketch on the Java heap.
*
- * <p>If this sketch is already in compact form this operation returns <i>this</i>.
+ * <p>If this sketch is already in the proper form, this method returns <i>this</i>,
+ * otherwise, this method returns a new CompactSketch of the proper form.
+ *
+ * <p>A CompactSketch is always immutable.</p>
*
* @return this sketch as an ordered CompactSketch on the Java heap.
*/
public abstract CompactSketch compact();
/**
- * Convert this sketch to a CompactSketch in the chosen form.
+ * Convert this sketch to a new CompactSketch of the chosen order and direct or on the heap.
+ *
+ * <p>If this sketch is already in the proper form, this operation returns <i>this</i>,
+ * otherwise, this method returns a new CompactSketch of the proper form.
*
- * <p>If this sketch is already in compact form this operation returns <i>this</i>.
+ * <p>If this sketch is a type of UpdateSketch, the compacting process converts the hash table
+ * of the UpdateSketch to a simple list of the valid hash values.
+ * Any hash values of zero or equal-to or greater than theta will be discarded.
+ * The number of valid values remaining in the CompactSketch depends on a number of factors,
+ * but may be larger or smaller than <i>Nominal Entries</i> (or <i>k</i>).
+ * It will never exceed 2<i>k</i>.
+ * If it is critical to always limit the size to no more than <i>k</i>,
+ * then <i>rebuild()</i> should be called on the UpdateSketch prior to calling this method.</p>
*
- * <p>Otherwise, this compacting process converts the hash table form of an UpdateSketch to
- * a simple list of the valid hash values from the hash table. Any hash values equal to or
- * greater than theta will be discarded. The number of valid values remaining in the
- * Compact Sketch depends on a number of factors, but may be larger or smaller than
- * <i>Nominal Entries</i> (or <i>k</i>). It will never exceed 2<i>k</i>. If it is critical
- * to always limit the size to no more than <i>k</i>, then <i>rebuild()</i> should be called
- * on the UpdateSketch prior to this.
+ * <p>A CompactSketch is always immutable.</p>
*
* @param dstOrdered
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
@@ -650,7 +658,7 @@ public abstract class Sketch {
* @param curCount the given curCount
* @param thetaLong the given thetaLong
* @return thetaLong
- */
+ */ //This handles #4 above
static final long correctThetaOnCompact(final boolean empty, final int curCount,
final long thetaLong) {
return (empty && (curCount == 0) && (thetaLong < Long.MAX_VALUE)) ? Long.MAX_VALUE : thetaLong;
@@ -707,19 +715,12 @@ public abstract class Sketch {
return HeapQuickSelectSketch.heapifyInstance(srcMem, seed);
}
case COMPACT: {
+ final boolean empty = (flags & EMPTY_FLAG_MASK) != 0;
+ if (!empty) { PreambleUtil.checkMemorySeedHash(srcMem, seed); }
final boolean srcOrdered = (flags & ORDERED_FLAG_MASK) != 0;
- if (!compactFlag) {
- throw new SketchesArgumentException(
- "Corrupted: COMPACT family sketch image must have compact flag set");
- }
- final boolean readOnly = (flags & READ_ONLY_FLAG_MASK) != 0;
- if (!readOnly) {
- throw new SketchesArgumentException(
- "Corrupted: COMPACT family sketch image must have Read-Only flag set");
- }
- final short memSeedHash = PreambleUtil.checkMemorySeedHash(srcMem, seed);
- return CompactSketch.anyMemoryToCompactHeap(srcMem, memSeedHash, srcOrdered);
+ return CompactOperations.memoryToCompact(srcMem, srcOrdered, null);
} //end of Compact
+
default: {
throw new SketchesArgumentException(
"Sketch cannot heapify family: " + family + " as a Sketch");
diff --git a/src/main/java/org/apache/datasketches/theta/UnionImpl.java b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
index bf0dc7d..a1b20bc 100644
--- a/src/main/java/org/apache/datasketches/theta/UnionImpl.java
+++ b/src/main/java/org/apache/datasketches/theta/UnionImpl.java
@@ -37,7 +37,7 @@ import static org.apache.datasketches.theta.PreambleUtil.extractSerVer;
import static org.apache.datasketches.theta.PreambleUtil.extractThetaLong;
import static org.apache.datasketches.theta.PreambleUtil.extractUnionThetaLong;
import static org.apache.datasketches.theta.PreambleUtil.insertUnionThetaLong;
-import static org.apache.datasketches.theta.PreambleUtil.isSingleItemSketch;
+import static org.apache.datasketches.theta.SingleItemSketch.otherCheckForSingleItem;
import org.apache.datasketches.Family;
import org.apache.datasketches.HashOperations;
@@ -243,8 +243,10 @@ final class UnionImpl extends Union {
final long[] compactCacheOut =
CompactOperations.compactCache(gadgetCacheCopy, curCountOut, minThetaLong, dstOrdered);
final boolean empty = gadget_.isEmpty() && unionEmpty_;
- return createCompactSketch(
- compactCacheOut, empty, seedHash_, curCountOut, minThetaLong, dstOrdered, dstMem);
+ final short seedHash = gadget_.getSeedHash();
+ return CompactOperations.componentsToCompact(
+ minThetaLong, curCountOut, seedHash, empty, true, dstOrdered, dstOrdered, dstMem,
+ compactCacheOut);
}
@Override
@@ -374,7 +376,7 @@ final class UnionImpl extends Union {
final int preLongs = extractPreLongs(skMem);
if (preLongs == 1) {
- if (isSingleItemSketch(skMem)) {
+ if (otherCheckForSingleItem(skMem)) {
final long hash = skMem.getLong(8);
gadget_.hashUpdate(hash);
return;
diff --git a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
index 352af73..686789d 100644
--- a/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
+++ b/src/main/java/org/apache/datasketches/theta/UpdateSketch.java
@@ -26,6 +26,7 @@ import static org.apache.datasketches.Util.MIN_LG_NOM_LONGS;
import static org.apache.datasketches.Util.checkSeedHashes;
import static org.apache.datasketches.Util.computeSeedHash;
import static org.apache.datasketches.hash.MurmurHash3.hash;
+import static org.apache.datasketches.theta.CompactOperations.componentsToCompact;
import static org.apache.datasketches.theta.PreambleUtil.BIG_ENDIAN_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.COMPACT_FLAG_MASK;
import static org.apache.datasketches.theta.PreambleUtil.FAMILY_BYTE;
@@ -136,7 +137,10 @@ public abstract class UpdateSketch extends Sketch {
@Override
public CompactSketch compact(final boolean dstOrdered, final WritableMemory dstMem) {
- return compact(this, dstOrdered, dstMem);
+ return componentsToCompact(getThetaLong(), getRetainedEntries(), getSeedHash(), isEmpty(),
+ false, false, dstOrdered, dstMem, getCache());
+
+ //return compact(this, dstOrdered, dstMem);
}
static CompactSketch compact(final UpdateSketch usk, final boolean dstOrdered,
diff --git a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
index cc47dd6..3f4ba4d 100644
--- a/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
+++ b/src/test/java/org/apache/datasketches/theta/AnotBimplTest.java
@@ -33,7 +33,7 @@ import org.testng.annotations.Test;
/**
* @author Lee Rhodes
*/
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class AnotBimplTest {
@Test
diff --git a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
index 6170e2f..199043f 100644
--- a/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/CompactSketchTest.java
@@ -228,16 +228,74 @@ public class CompactSketchTest {
@Test
public void checkDirectCompactSingleItemSketch() {
+ State state;
UpdateSketch sk = Sketches.updateSketchBuilder().build();
- CompactSketch csk = sk.compact(true, WritableMemory.allocate(16));
- int bytes = csk.getCurrentBytes(true);
- assertEquals(bytes, 8);
+
+ CompactSketch csko; //ordered
+ CompactSketch csku; //unordered
+
+ WritableMemory wmem = WritableMemory.allocate(16);
+ csko = sk.compact(true, wmem); //empty, direct, ordered
+ //ClassType, Count, Bytes, Compact, Empty, Direct, Memory, Ordered, Estimation
+ state = new State("DirectCompactOrderedSketch", 0, 8, true, true, false, true, true, false);
+ state.check(csko);
+
+ wmem = WritableMemory.allocate(16);
+ csku = sk.compact(false, wmem); //empty, direct, unordered
+ state = new State("DirectCompactOrderedSketch", 0, 8, true, true, false, true, true, false);
+ state.check(csku);
+
sk.update(1);
- csk = sk.compact(true, WritableMemory.allocate(16));
- bytes = csk.getCurrentBytes(true);
- assertEquals(bytes, 16);
- assertTrue(csk == csk.compact());
- assertTrue(csk == csk.compact(true, null));
+ wmem = WritableMemory.allocate(16);
+ csko = sk.compact(true, wmem); //Single, direct, ordered
+ state = new State("DirectCompactOrderedSketch", 1, 16, true, false, false, true, true, false);
+ state.check(csko);
+
+ wmem = WritableMemory.allocate(16);
+ csku = sk.compact(false, wmem); //Single, direct, unordered
+ state = new State("DirectCompactOrderedSketch", 1, 16, true, false, false, true, true, false);
+ state.check(csku);
+
+ CompactSketch csk2o; //ordered
+ CompactSketch csk2u; //unordered
+
+ csk2o = csku.compact(); //single, heap, ordered
+ state = new State("SingleItemSketch", 1, 16, true, false, false, false, true, false);
+ state.check(csk2o);
+
+ csk2o = csku.compact(true, null); //single, heap, ordered
+ state.check(csk2o);
+
+ csk2o = csku.compact(false, null); //single, heap, ordered
+ state.check(csk2o);
+
+ csk2o = csko.compact(true, null); //single, heap, ordered
+ state.check(csk2o);
+
+ csk2o = csko.compact(false, null); //single, heap, ordered
+ state.check(csk2o);
+
+ wmem = WritableMemory.allocate(16);
+ csk2o = csku.compact(true, wmem);
+ state.classType = "DirectCompactOrderedSketch";
+ state.memory = true;
+ state.check(csk2o);
+
+ wmem = WritableMemory.allocate(16);
+ csk2u = csku.compact(false, wmem);
+ state.classType = "DirectCompactOrderedSketch";
+ state.check(csk2u);
+
+ wmem = WritableMemory.allocate(16);
+ csk2o = csko.compact(true, wmem);
+ state.classType = "DirectCompactOrderedSketch";
+ state.memory = true;
+ state.check(csk2o);
+
+ wmem = WritableMemory.allocate(16);
+ csk2u = csko.compact(false, wmem);
+ state.classType = "DirectCompactOrderedSketch";
+ state.check(csk2u);
}
@Test
@@ -254,9 +312,9 @@ public class CompactSketchTest {
@Test
public void checkHeapifyEmptySketch() {
UpdateSketch sk = Sketches.updateSketchBuilder().build();
- WritableMemory wmem = WritableMemory.allocate(16); //extra bytes
+ WritableMemory wmem = WritableMemory.allocate(16); //empty, but extra bytes
CompactSketch csk = sk.compact(false, wmem);
- assertTrue(csk instanceof EmptyCompactSketch);
+ assertTrue(csk instanceof DirectCompactOrderedSketch);
Sketch csk2 = Sketch.heapify(wmem);
assertTrue(csk2 instanceof EmptyCompactSketch);
}
@@ -271,6 +329,44 @@ public class CompactSketchTest {
assertTrue(cache.length == 0);
}
+ private static class State {
+ String classType = null;
+ int count = 0;
+ int bytes = 0;
+ boolean compact = false;
+ boolean empty = false;
+ boolean direct = false;
+ boolean memory = false;
+ boolean ordered = false;
+ boolean estimation = false;
+
+
+ State(String classType, int count, int bytes, boolean compact, boolean empty, boolean direct,
+ boolean memory, boolean ordered, boolean estimation) {
+ this.classType = classType;
+ this.count = count;
+ this.bytes = bytes;
+ this.compact = compact;
+ this.empty = empty;
+ this.direct = direct;
+ this.memory = memory;
+ this.ordered = ordered;
+ this.estimation = estimation;
+ }
+
+ void check(CompactSketch csk) {
+ assertEquals(csk.getClass().getSimpleName(), classType, "ClassType");
+ assertEquals(csk.getRetainedEntries(), count, "curCount");
+ assertEquals(csk.getCurrentBytes(true), bytes, "Bytes" );
+ assertEquals(csk.isCompact(), compact, "Compact");
+ assertEquals(csk.isEmpty(), empty, "Empty");
+ assertEquals(csk.isDirect(), direct, "Direct");
+ assertEquals(csk.hasMemory(), memory, "Memory");
+ assertEquals(csk.isOrdered(), ordered, "Ordered");
+ assertEquals(csk.isEstimationMode(), estimation, "Estimation");
+ }
+ }
+
@Test
public void printlnTest() {
println("PRINTING: "+this.getClass().getName());
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
index 3fe910e..3802dfd 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentDirectQuickSelectSketchTest.java
@@ -27,14 +27,13 @@ import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableDirectHandle;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.HashOperations;
import org.apache.datasketches.SketchesArgumentException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableDirectHandle;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author eshcar
@@ -439,8 +438,7 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertEquals(csk2.getUpperBound(2), localUB);
assertTrue(csk2.isEmpty());
assertFalse(csk2.isEstimationMode());
- assertTrue(csk2 instanceof EmptyCompactSketch);
-
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
CompactSketch csk3 = shared.compact(true, mem2);
csk3.toString(false, true, 0, false);
csk3.toString();
@@ -449,7 +447,7 @@ public class ConcurrentDirectQuickSelectSketchTest {
assertEquals(csk3.getUpperBound(2), localUB);
assertTrue(csk3.isEmpty());
assertFalse(csk3.isEstimationMode());
- assertTrue(csk2 instanceof EmptyCompactSketch);
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
}
}
diff --git a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
index dbb6f6a..8ae2d89 100644
--- a/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ConcurrentHeapQuickSelectSketchTest.java
@@ -30,14 +30,13 @@ import static org.testng.Assert.fail;
import java.util.Arrays;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesStateException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
/**
* @author eshcar
@@ -382,7 +381,7 @@ public class ConcurrentHeapQuickSelectSketchTest {
assertEquals(csk2.getUpperBound(2), localUB);
assertEquals(csk2.isEmpty(), true);
assertEquals(csk2.isEstimationMode(), estimating);
- assertTrue(csk2 instanceof EmptyCompactSketch);
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
CompactSketch csk3 = shared.compact(true, mem2);
csk3.toString(false, true, 0, false);
@@ -392,7 +391,7 @@ public class ConcurrentHeapQuickSelectSketchTest {
assertEquals(csk3.getUpperBound(2), localUB);
assertEquals(csk3.isEmpty(), true);
assertEquals(csk3.isEstimationMode(), estimating);
- assertTrue(csk3 instanceof EmptyCompactSketch);
+ assertTrue(csk3 instanceof DirectCompactOrderedSketch);
}
@Test
@@ -594,7 +593,7 @@ public class ConcurrentHeapQuickSelectSketchTest {
assertEquals(csk2.getUpperBound(2), uskUB);
assertTrue(csk2.isEmpty());
assertFalse(csk2.isEstimationMode());
- assertTrue(csk2 instanceof EmptyCompactSketch);
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
CompactSketch csk3 = shared.compact(true, mem2);
csk3.toString(false, true, 0, false);
@@ -604,7 +603,7 @@ public class ConcurrentHeapQuickSelectSketchTest {
assertEquals(csk3.getUpperBound(2), uskUB);
assertTrue(csk3.isEmpty());
assertFalse(csk3.isEstimationMode());
- assertTrue(csk2 instanceof EmptyCompactSketch);
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
}
@Test(expectedExceptions = SketchesArgumentException.class)
diff --git a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
index 2ab13ca..73245e5 100644
--- a/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/DirectQuickSelectSketchTest.java
@@ -41,16 +41,15 @@ import static org.testng.Assert.fail;
import java.util.Arrays;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableDirectHandle;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.HashOperations;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.SketchesReadOnlyException;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableDirectHandle;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
@@ -389,7 +388,7 @@ public class DirectQuickSelectSketchTest {
assertEquals(csk2.getUpperBound(2), uskUB);
assertEquals(csk2.isEmpty(), true);
assertEquals(csk2.isEstimationMode(), false);
- assertEquals(csk2.getClass().getSimpleName(), "EmptyCompactSketch");
+ assertEquals(csk2.getClass().getSimpleName(), "DirectCompactOrderedSketch");
CompactSketch csk3 = usk.compact(true, mem2);
csk3.toString(false, true, 0, false);
@@ -399,7 +398,7 @@ public class DirectQuickSelectSketchTest {
assertEquals(csk3.getUpperBound(2), uskUB);
assertEquals(csk3.isEmpty(), true);
assertEquals(csk3.isEstimationMode(), false);
- assertEquals(csk3.getClass().getSimpleName(), "EmptyCompactSketch");
+ assertEquals(csk3.getClass().getSimpleName(), "DirectCompactOrderedSketch");
}
}
diff --git a/src/test/java/org/apache/datasketches/theta/EmptyTest.java b/src/test/java/org/apache/datasketches/theta/EmptyTest.java
index 7bce4c1..7b93117 100644
--- a/src/test/java/org/apache/datasketches/theta/EmptyTest.java
+++ b/src/test/java/org/apache/datasketches/theta/EmptyTest.java
@@ -36,7 +36,7 @@ import org.testng.annotations.Test;
*
* @author Lee Rhodes
*/
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class EmptyTest {
@Test
@@ -136,9 +136,10 @@ public class EmptyTest {
Sketches.heapifySketch(mem);
}
- private static Memory badEmptySk() { //missing the empty bit
+ //SerVer 2 had an empty sketch where preLongs = 1, but empty bit was not set.
+ private static Memory badEmptySk() {
final long preLongs = 1;
- final long serVer = 3;
+ final long serVer = 2;
final long family = 3; //compact
final long flags = (ORDERED_FLAG_MASK | COMPACT_FLAG_MASK | READ_ONLY_FLAG_MASK);
final long seedHash = 0x93CC;
diff --git a/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java b/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java
index 29dfd09..e5d040c 100644
--- a/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java
+++ b/src/test/java/org/apache/datasketches/theta/ForwardCompatibilityTest.java
@@ -24,12 +24,11 @@ import static org.apache.datasketches.theta.BackwardConversions.convertSerVer3to
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
@@ -113,16 +112,10 @@ public class ForwardCompatibilityTest {
WritableMemory srcMemW = WritableMemory.allocate(16);
srcMem.copyTo(0, srcMemW, 0, 16);
PreambleUtil.setEmpty(srcMemW); //Force
- assertTrue(PreambleUtil.isEmptySketch(srcMemW));
+ assertTrue(PreambleUtil.isEmptyFlag(srcMemW));
srcMemW.putInt(8, 0); //corrupt curCount = 0
Sketch sketch = Sketch.heapify(srcMemW);
- assertEquals(sketch.isEmpty(), true); //was forced true
- assertEquals(sketch.isEstimationMode(), false);
- assertEquals(sketch.isDirect(), false);
- assertEquals(sketch.hasMemory(), false);
- assertEquals(sketch.isCompact(), true);
- assertEquals(sketch.isOrdered(), true);
assertTrue(sketch instanceof EmptyCompactSketch);
}
@@ -136,17 +129,11 @@ public class ForwardCompatibilityTest {
WritableMemory srcMemW = WritableMemory.allocate(24);
srcMem.copyTo(0, srcMemW, 0, 24);
PreambleUtil.setEmpty(srcMemW); //Force
- assertTrue(PreambleUtil.isEmptySketch(srcMemW));
+ assertTrue(PreambleUtil.isEmptyFlag(srcMemW));
srcMemW.putInt(8, 0); //corrupt curCount = 0
srcMemW.putLong(16, Long.MAX_VALUE); //corrupt to make it look empty
- Sketch sketch = Sketch.heapify(srcMemW);
- assertEquals(sketch.isEmpty(), true); //was forced true
- assertEquals(sketch.isEstimationMode(), false);
- assertEquals(sketch.isDirect(), false);
- assertEquals(sketch.hasMemory(), false);
- assertEquals(sketch.isCompact(), true);
- assertEquals(sketch.isOrdered(), true);
+ Sketch sketch = Sketch.heapify(srcMemW); //now serVer=3, EmptyCompactSketch
assertTrue(sketch instanceof EmptyCompactSketch);
}
diff --git a/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java b/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java
index eb814ab..793eb13 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapAlphaSketchTest.java
@@ -38,14 +38,13 @@ import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
@@ -322,7 +321,7 @@ public class HeapAlphaSketchTest {
assertEquals(csk2.getUpperBound(2), uskUB);
assertEquals(csk2.isEmpty(), true);
assertEquals(csk2.isEstimationMode(), estimating);
- assertTrue(csk2 instanceof EmptyCompactSketch);
+ assertTrue(csk2 instanceof DirectCompactOrderedSketch);
CompactSketch csk3 = usk.compact(true, mem2);
csk3.toString(false, true, 0, false);
@@ -332,7 +331,7 @@ public class HeapAlphaSketchTest {
assertEquals(csk3.getUpperBound(2), uskUB);
assertEquals(csk3.isEmpty(), true);
assertEquals(csk3.isEstimationMode(), estimating);
- assertTrue(csk3 instanceof EmptyCompactSketch);
+ assertTrue(csk3 instanceof DirectCompactOrderedSketch);
}
@Test
diff --git a/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java b/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java
index 2b74ee6..fcec52b 100644
--- a/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/HeapQuickSelectSketchTest.java
@@ -39,14 +39,13 @@ import static org.testng.Assert.fail;
import java.util.Arrays;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
@@ -319,7 +318,7 @@ public class HeapQuickSelectSketchTest {
assertEquals(csk2.getUpperBound(2), uskUB);
assertEquals(csk2.isEmpty(), true);
assertEquals(csk2.isEstimationMode(), estimating);
- assertEquals(csk2.getClass().getSimpleName(), "EmptyCompactSketch");
+ assertEquals(csk2.getClass().getSimpleName(), "DirectCompactOrderedSketch");
CompactSketch csk3 = usk.compact(true, mem2);
println(csk3.toString(false, true, 0, false));
@@ -329,7 +328,7 @@ public class HeapQuickSelectSketchTest {
assertEquals(csk3.getUpperBound(2), uskUB);
assertEquals(csk3.isEmpty(), true);
assertEquals(csk3.isEstimationMode(), estimating);
- assertEquals(csk3.getClass().getSimpleName(), "EmptyCompactSketch");
+ assertEquals(csk3.getClass().getSimpleName(), "DirectCompactOrderedSketch");
}
@Test
diff --git a/src/test/java/org/apache/datasketches/theta/PairwiseSetOperationsTest.java b/src/test/java/org/apache/datasketches/theta/PairwiseSetOperationsTest.java
index 5c1b76f..9c61b9a 100644
--- a/src/test/java/org/apache/datasketches/theta/PairwiseSetOperationsTest.java
+++ b/src/test/java/org/apache/datasketches/theta/PairwiseSetOperationsTest.java
@@ -23,7 +23,7 @@ import static org.testng.Assert.assertEquals;
import org.testng.annotations.Test;
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class PairwiseSetOperationsTest {
// Intersection
diff --git a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
index 0f21a2e..cbcb1c3 100644
--- a/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SetOperationTest.java
@@ -33,20 +33,19 @@ import static org.testng.Assert.assertTrue;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
-import org.testng.annotations.Test;
-
+import org.apache.datasketches.Family;
+import org.apache.datasketches.ResizeFactor;
+import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.memory.DefaultMemoryRequestServer;
import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.MemoryRequestServer;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.Family;
-import org.apache.datasketches.ResizeFactor;
-import org.apache.datasketches.SketchesArgumentException;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
*/
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class SetOperationTest {
@Test
diff --git a/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java b/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
index 36abe4a..2626760 100644
--- a/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SetOpsCornerCasesTest.java
@@ -217,6 +217,7 @@ public class SetOpsCornerCasesTest {
return PairwiseSetOperations.aNotB(tskA, tskB);
}
+
private static void checkCornerCase(Sketch rskA, Sketch rskB) {
double estA = rskA.getEstimate();
double estB = rskB.getEstimate();
@@ -230,8 +231,6 @@ public class SetOpsCornerCasesTest {
Assert.assertEquals(emptyB, emptyA);
Assert.assertEquals(thetaLongB, thetaLongA);
Assert.assertEquals(countB, countA);
- String A = rskA.getClass().getSimpleName();
- String B = rskB.getClass().getSimpleName();
Assert.assertEquals(rskA.getClass().getSimpleName(), rskB.getClass().getSimpleName());
}
diff --git a/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java b/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java
index 23a0a37..7a50ba8 100644
--- a/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SingleItemSketchTest.java
@@ -36,7 +36,7 @@ import org.testng.annotations.Test;
/**
* @author Lee Rhodes
*/
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class SingleItemSketchTest {
final static short DEFAULT_SEED_HASH = (short) (computeSeedHash(DEFAULT_UPDATE_SEED) & 0XFFFFL);
@@ -202,13 +202,12 @@ public class SingleItemSketchTest {
csk = sk1.compact(false, null);
assertTrue(csk instanceof SingleItemSketch);
- //SingleItemSketch has no off-heap form.
bytes = Sketches.getMaxCompactSketchBytes(1);
wmem = WritableMemory.wrap(new byte[bytes]);
csk = sk1.compact(true, wmem);
- assertTrue(csk instanceof SingleItemSketch);
+ assertTrue(csk instanceof DirectCompactOrderedSketch);
csk = sk1.compact(false, wmem);
- assertTrue(csk instanceof SingleItemSketch);
+ assertTrue(csk instanceof DirectCompactOrderedSketch);
}
@Test
@@ -295,7 +294,7 @@ public class SingleItemSketchTest {
inter.update(sk2);
WritableMemory wmem = WritableMemory.wrap(new byte[16]);
CompactSketch csk = inter.getResult(false, wmem);
- assertTrue(csk instanceof SingleItemSketch);
+ assertTrue(csk instanceof DirectCompactOrderedSketch);
Sketch csk2 = Sketches.heapifySketch(wmem);
assertTrue(csk2 instanceof SingleItemSketch);
println(csk2.toString(true, true, 1, true));
diff --git a/src/test/java/org/apache/datasketches/theta/SketchTest.java b/src/test/java/org/apache/datasketches/theta/SketchTest.java
index 3cf9783..037dd47 100644
--- a/src/test/java/org/apache/datasketches/theta/SketchTest.java
+++ b/src/test/java/org/apache/datasketches/theta/SketchTest.java
@@ -39,19 +39,18 @@ import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
-import org.testng.annotations.Test;
-
-import org.apache.datasketches.memory.Memory;
-import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.Family;
import org.apache.datasketches.ResizeFactor;
import org.apache.datasketches.SketchesArgumentException;
import org.apache.datasketches.Util;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
*/
-@SuppressWarnings("javadoc")
+@SuppressWarnings({"javadoc","deprecation"})
public class SketchTest {
@Test
diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java
index d113ed1..229558e 100644
--- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleAnotBTest.java
@@ -44,6 +44,7 @@ public class AdoubleAnotBTest {
private final DoubleSummary.Mode mode = Mode.Sum;
private final Results results = new Results();
+ @SuppressWarnings("deprecation")
private static void threeMethodsWithTheta(
final AnotB<DoubleSummary> aNotB,
final Sketch<DoubleSummary> skA,
@@ -147,6 +148,7 @@ public class AdoubleAnotBTest {
/*****************************************/
+ @SuppressWarnings("deprecation")
@Test
public void aNotBNullEmptyCombinations() {
AnotB<DoubleSummary> aNotB = new AnotB<>();
diff --git a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java
index 25359ca..4dc75f7 100644
--- a/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/adouble/AdoubleIntersectionTest.java
@@ -82,6 +82,7 @@ public class AdoubleIntersectionTest {
Assert.assertEquals(result.getUpperBound(1), 0.0);
}
+ @SuppressWarnings("deprecation")
@Test
public void intersectionExactMode() {
UpdatableSketch<Double, DoubleSummary> sketch1 =
diff --git a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java
index 217b066..5cd7b98 100644
--- a/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/aninteger/IntegerSketchTest.java
@@ -70,6 +70,7 @@ public class IntegerSketchTest {
assertEquals(csk.getEstimate(), K * 1.0, K * .03);
}
+ @SuppressWarnings("deprecation")
@Test
public void aNotBTest() {
int lgK = 4;
@@ -82,7 +83,7 @@ public class IntegerSketchTest {
a1Sk1.update(i, 1);
}
anotb.update(a1Sk1, a1Sk2);
- CompactSketch<IntegerSummary> cSk = anotb.getResult();
+ anotb.getResult();
}
@Test
diff --git a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java
index 908079f..43b4034 100644
--- a/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java
+++ b/src/test/java/org/apache/datasketches/tuple/strings/ArrayOfStringsSketchTest.java
@@ -22,14 +22,13 @@ package org.apache.datasketches.tuple.strings;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import org.testng.annotations.Test;
-
import org.apache.datasketches.memory.WritableMemory;
import org.apache.datasketches.tuple.AnotB;
import org.apache.datasketches.tuple.CompactSketch;
import org.apache.datasketches.tuple.Intersection;
import org.apache.datasketches.tuple.SketchIterator;
import org.apache.datasketches.tuple.Union;
+import org.testng.annotations.Test;
/**
* @author Lee Rhodes
@@ -38,6 +37,7 @@ import org.apache.datasketches.tuple.Union;
public class ArrayOfStringsSketchTest {
private static final String LS = System.getProperty("line.separator");
+ @SuppressWarnings("deprecation")
@Test
public void checkSketch() {
ArrayOfStringsSketch sketch1 = new ArrayOfStringsSketch();
@@ -57,7 +57,6 @@ public class ArrayOfStringsSketchTest {
String[] strArr3 = {"g", "h" };
sketch2.update(strArr3, strArr3);
-
Union<ArrayOfStringsSummary> union = new Union<>(new ArrayOfStringsSummarySetOperations());
union.update(sketch1);
union.update(sketch2);
diff --git a/tools/FindBugsExcludeFilter.xml b/tools/FindBugsExcludeFilter.xml
index f090992..ee747b6 100644
--- a/tools/FindBugsExcludeFilter.xml
+++ b/tools/FindBugsExcludeFilter.xml
@@ -1,5 +1,3 @@
-<FindBugsFilter> <!-- sketches-core -->
-
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
@@ -18,6 +16,7 @@ KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
+<FindBugsFilter> <!-- sketches-core -->
<!-- Too many false positives to be useful. I could not make it happy :( -->
<Match>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org