You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2023/06/07 02:55:08 UTC
[datasketches-java] 01/01: Preparatory work for generic items.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch prep_for_items_sketch
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit e4271bf3a83d15134907b7581ffed3e48c7c6809
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Tue Jun 6 19:54:53 2023 -0700
Preparatory work for generic items.
1. The UpdatableBitMask as part of the Flags field has been eliminated.
This should make the Flags field identical to the one used in C++.
2. The documentation of the serialization formats has been significantly
improved. See the docs for the KllPreambleUtil class.
3. I have reduced the dependence on the KllMemoryValidate class to those
that are actually required for validation.
4. More cleanup of fields and variables not really being used.
---
.../datasketches/kll/KllDirectDoublesSketch.java | 8 +-
.../datasketches/kll/KllDirectFloatsSketch.java | 8 +-
.../apache/datasketches/kll/KllDoublesHelper.java | 9 +-
.../apache/datasketches/kll/KllDoublesSketch.java | 44 ++++----
.../apache/datasketches/kll/KllFloatsHelper.java | 2 +-
.../apache/datasketches/kll/KllFloatsSketch.java | 40 ++++----
.../datasketches/kll/KllHeapDoublesSketch.java | 24 +++--
.../datasketches/kll/KllHeapFloatsSketch.java | 24 +++--
.../org/apache/datasketches/kll/KllHelper.java | 35 +++----
.../apache/datasketches/kll/KllMemoryValidate.java | 72 ++++++-------
.../apache/datasketches/kll/KllPreambleUtil.java | 113 ++++++++++-----------
.../org/apache/datasketches/kll/KllSketch.java | 18 ++--
.../datasketches/kll/KllMemoryValidateTest.java | 5 +-
tools/SketchesCheckstyle.xml | 2 +-
14 files changed, 189 insertions(+), 215 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
index 8ee358bf..f4c9e80d 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectDoublesSketch.java
@@ -22,7 +22,6 @@ package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
-import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
@@ -30,7 +29,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
-import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
@@ -59,13 +57,12 @@ import org.apache.datasketches.memory.WritableMemory;
class KllDirectDoublesSketch extends KllDoublesSketch {
/**
- * The constructor with Memory that can be off-heap.
+ * The constructor with WritableMemory that can be off-heap.
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @param memVal the MemoryValadate object
*/
- KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
- final KllMemoryValidate memVal) {
+ KllDirectDoublesSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
super(wmem, memReqSvr);
levelsArr = memVal.levelsArr;
}
@@ -83,7 +80,6 @@ class KllDirectDoublesSketch extends KllDoublesSketch {
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
setMemoryFamilyID(dstMem, Family.KLL.getID());
- setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
setMemoryK(dstMem, k);
setMemoryM(dstMem, m);
setMemoryN(dstMem, 0);
diff --git a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
index 4d3fb54c..a9ce96ce 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDirectFloatsSketch.java
@@ -22,7 +22,6 @@ package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
-import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryM;
@@ -30,7 +29,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryMinK;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryN;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
-import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryLevelZeroSortedFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryM;
@@ -59,13 +57,12 @@ import org.apache.datasketches.memory.WritableMemory;
class KllDirectFloatsSketch extends KllFloatsSketch {
/**
- * The constructor with Memory that can be off-heap.
+ * The constructor with WritableMemory that can be off-heap.
* @param wmem the current WritableMemory
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @param memVal the MemoryValadate object
*/
- KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr,
- final KllMemoryValidate memVal) {
+ KllDirectFloatsSketch(final WritableMemory wmem, final MemoryRequestServer memReqSvr, final KllMemoryValidate memVal) {
super(wmem, memReqSvr);
levelsArr = memVal.levelsArr;
}
@@ -83,7 +80,6 @@ class KllDirectFloatsSketch extends KllFloatsSketch {
setMemoryPreInts(dstMem, PREAMBLE_INTS_FULL);
setMemorySerVer(dstMem, SERIAL_VERSION_UPDATABLE);
setMemoryFamilyID(dstMem, Family.KLL.getID());
- setMemoryFlags(dstMem, UPDATABLE_BIT_MASK);
setMemoryK(dstMem, k);
setMemoryM(dstMem, m);
setMemoryN(dstMem, 0);
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
index 20061559..e5601b8b 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesHelper.java
@@ -49,14 +49,13 @@ final class KllDoublesHelper {
final int myMinK = mySketch.getMinK();
//update this sketch with level0 items from the other sketch
-
if (otherDblSk.isCompactSingleItem()) {
updateDouble(mySketch, otherDblSk.getDoubleSingleItem());
otherDoubleItemsArr = new double[0];
} else {
otherDoubleItemsArr = otherDblSk.getDoubleItemsArray();
for (int i = otherLevelsArr[0]; i < otherLevelsArr[1]; i++) {
- KllDoublesHelper.updateDouble(mySketch, otherDoubleItemsArr[i]);
+ updateDouble(mySketch, otherDoubleItemsArr[i]);
}
}
// after the level 0 update, we capture the state of levels and items arrays
@@ -68,7 +67,7 @@ final class KllDoublesHelper {
int[] myNewLevelsArr = myCurLevelsArr;
double[] myNewDoubleItemsArr = myCurDoubleItemsArr;
- if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge other levels if they exist
+ if (otherNumLevels > 1 && !otherDblSk.isCompactSingleItem()) { //now merge higher levels if they exist
final int tmpSpaceNeeded = mySketch.getNumRetained()
+ KllHelper.getNumRetainedAboveLevelZero(otherNumLevels, otherLevelsArr);
final double[] workbuf = new double[tmpSpaceNeeded];
@@ -115,7 +114,7 @@ final class KllDoublesHelper {
}
//MEMORY SPACE MANAGEMENT
- if (mySketch.updatableMemFormat) {
+ if (mySketch.serialVersionUpdatable) {
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewDoubleItemsArr.length);
}
}
@@ -358,7 +357,7 @@ final class KllDoublesHelper {
worklevels[0] = 0;
// Note: the level zero data from "other" was already inserted into "self"
- final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels,myCurLevelsArr);
+ final int selfPopZero = KllHelper.currentLevelSize(0, myCurNumLevels, myCurLevelsArr);
System.arraycopy(myCurDoubleItemsArr, myCurLevelsArr[0], workbuf, worklevels[0], selfPopZero);
worklevels[1] = worklevels[0] + selfPopZero;
diff --git a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
index 47e86aac..9295f7b6 100644
--- a/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllDoublesSketch.java
@@ -21,8 +21,8 @@ package org.apache.datasketches.kll;
import static java.lang.Math.max;
import static java.lang.Math.min;
-import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
-import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
+import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
@@ -61,60 +61,56 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou
*/
public static KllDoublesSketch heapify(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
- if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
return KllHeapDoublesSketch.heapifyImpl(srcMem);
}
/**
- * Create a new direct instance of this sketch with a given <em>k</em>.
- * @param k parameter that controls size of the sketch and accuracy of estimates.
+ * Create a new direct instance of this sketch with the default <em>k</em>.
+ * The default <em>k</em> = 200 results in a normalized rank error of about
+ * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllDoublesSketch newDirectInstance(
- final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
- Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
- Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
+ return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
}
-
+
/**
- * Create a new direct instance of this sketch with the default <em>k</em>.
- * The default <em>k</em> = 200 results in a normalized rank error of about
- * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
+ * Create a new direct instance of this sketch with a given <em>k</em>.
+ * @param k parameter that controls size of the sketch and accuracy of estimates.
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllDoublesSketch newDirectInstance(
+ final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- return KllDirectDoublesSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
+ return KllDirectDoublesSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
}
/**
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
- * This will have a rank error of about 1.65%.
- * @return new KllDoublesSketch on the heap.
+ * @return new KllDoublesSketch on the Java heap.
*/
- public static KllDoublesSketch newHeapInstance() {
- return new KllHeapDoublesSketch(DEFAULT_K, DEFAULT_M);
+ public static KllDoublesSketch newHeapInstance() {
+ return newHeapInstance(DEFAULT_K);
}
/**
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
- * <em>k</em> can be between DEFAULT_M and 65535, inclusive.
+ * <em>k</em> can be between 8, inclusive, and 65535, inclusive.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* @param k parameter that controls size of the sketch and accuracy of estimates.
- * @return new KllDoublesSketch on the heap.
+ * @return new KllDoublesSketch on the Java heap.
*/
public static KllDoublesSketch newHeapInstance(final int k) {
return new KllHeapDoublesSketch(k, DEFAULT_M);
@@ -129,7 +125,7 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou
public static KllDoublesSketch wrap(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
- if (memVal.updatableMemFormat) {
+ if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
return new KllDirectDoublesSketch((WritableMemory) srcMem, null, memVal);
} else {
return new KllDirectCompactDoublesSketch(srcMem, memVal);
@@ -148,10 +144,8 @@ public abstract class KllDoublesSketch extends KllSketch implements QuantilesDou
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
- if (memVal.updatableMemFormat) {
- if (!memVal.readOnly) {
- Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- }
+ if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
+ Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
return new KllDirectDoublesSketch(srcMem, memReqSvr, memVal);
} else {
return new KllDirectCompactDoublesSketch(srcMem, memVal);
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
index 1e399ab8..5ed537f5 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsHelper.java
@@ -114,7 +114,7 @@ final class KllFloatsHelper {
}
//MEMORY SPACE MANAGEMENT
- if (mySketch.updatableMemFormat) {
+ if (mySketch.serialVersionUpdatable) {
mySketch.wmem = KllHelper.memorySpaceMgmt(mySketch, myNewLevelsArr.length, myNewFloatItemsArr.length);
}
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
index 8ff0ccf4..5dc83c33 100644
--- a/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllFloatsSketch.java
@@ -21,8 +21,8 @@ package org.apache.datasketches.kll;
import static java.lang.Math.max;
import static java.lang.Math.min;
-import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
-import static org.apache.datasketches.kll.KllSketch.Error.MUST_NOT_BE_UPDATABLE_FORMAT;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
+import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
@@ -61,60 +61,56 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa
*/
public static KllFloatsSketch heapify(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
- if (getMemoryUpdatableFormatFlag(srcMem)) { Error.kllSketchThrow(MUST_NOT_BE_UPDATABLE_FORMAT); }
return KllHeapFloatsSketch.heapifyImpl(srcMem);
}
/**
- * Create a new direct instance of this sketch with a given <em>k</em>.
- * @param k parameter that controls size of the sketch and accuracy of estimates.
+ * Create a new direct instance of this sketch with the default <em>k</em>.
+ * The default <em>k</em> = 200 results in a normalized rank error of about
+ * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllFloatsSketch newDirectInstance(
- final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
- Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
- Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
+ return newDirectInstance(DEFAULT_K, dstMem, memReqSvr);
}
-
+
/**
- * Create a new direct instance of this sketch with the default <em>k</em>.
- * The default <em>k</em> = 200 results in a normalized rank error of about
- * 1.65%. Larger <em>k</em> will have smaller error but the sketch will be larger (and slower).
+ * Create a new direct instance of this sketch with a given <em>k</em>.
+ * @param k parameter that controls size of the sketch and accuracy of estimates.
* @param dstMem the given destination WritableMemory object for use by the sketch
* @param memReqSvr the given MemoryRequestServer to request a larger WritableMemory
* @return a new direct instance of this sketch
*/
public static KllFloatsSketch newDirectInstance(
+ final int k,
final WritableMemory dstMem,
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(dstMem, "Parameter 'dstMem' must not be null");
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- return KllDirectFloatsSketch.newDirectInstance(DEFAULT_K, DEFAULT_M, dstMem, memReqSvr);
+ return KllDirectFloatsSketch.newDirectInstance(k, DEFAULT_M, dstMem, memReqSvr);
}
/**
* Create a new heap instance of this sketch with the default <em>k = 200</em>.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
- * This will have a rank error of about 1.65%.
- * @return new KllFloatsSketch on the heap.
+ * @return new KllFloatsSketch on the Java heap.
*/
public static KllFloatsSketch newHeapInstance() {
- return new KllHeapFloatsSketch(DEFAULT_K, DEFAULT_M);
+ return newHeapInstance(DEFAULT_K);
}
/**
* Create a new heap instance of this sketch with a given parameter <em>k</em>.
- * <em>k</em> can be between DEFAULT_M and 65535, inclusive.
+ * <em>k</em> can be between 8, inclusive, and 65535, inclusive.
* The default <em>k</em> = 200 results in a normalized rank error of about
* 1.65%. Larger K will have smaller error but the sketch will be larger (and slower).
* @param k parameter that controls size of the sketch and accuracy of estimates.
- * @return new KllFloatsSketch on the heap.
+ * @return new KllFloatsSketch on the Java heap.
*/
public static KllFloatsSketch newHeapInstance(final int k) {
return new KllHeapFloatsSketch(k, DEFAULT_M);
@@ -129,7 +125,7 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa
public static KllFloatsSketch wrap(final Memory srcMem) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
- if (memVal.updatableMemFormat) {
+ if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE) {
return new KllDirectFloatsSketch((WritableMemory) srcMem, null, memVal);
} else {
return new KllDirectCompactFloatsSketch(srcMem, memVal);
@@ -148,10 +144,8 @@ public abstract class KllFloatsSketch extends KllSketch implements QuantilesFloa
final MemoryRequestServer memReqSvr) {
Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
- if (memVal.updatableMemFormat) {
- if (!memVal.readOnly) {
+ if (getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE && !srcMem.isReadOnly()) {
Objects.requireNonNull(memReqSvr, "Parameter 'memReqSvr' must not be null");
- }
return new KllDirectFloatsSketch(srcMem, memReqSvr, memVal);
} else {
return new KllDirectCompactFloatsSketch(srcMem, memVal);
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
index 2b18fe2e..89f1dac3 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapDoublesSketch.java
@@ -21,6 +21,8 @@ package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
+import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.NOT_SINGLE_ITEM;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
@@ -73,6 +75,12 @@ final class KllHeapDoublesSketch extends KllDoublesSketch {
doubleItems_ = new double[k];
}
+ static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) {
+ Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
+ final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
+ return new KllHeapDoublesSketch(srcMem, memVal);
+ }
+
/**
* Heapify constructor.
* @param srcMem Memory object that contains data serialized by this sketch.
@@ -86,14 +94,14 @@ final class KllHeapDoublesSketch extends KllDoublesSketch {
minK_ = memValidate.minK;
levelsArr = memValidate.levelsArr;
isLevelZeroSorted_ = memValidate.level0Sorted;
- final boolean updatableMemFormat = memValidate.updatableMemFormat;
+ final boolean serialVersionUpdatable = getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE;
- if (memValidate.empty && !updatableMemFormat) {
+ if (memValidate.empty && !serialVersionUpdatable) {
minDoubleItem_ = Double.NaN;
maxDoubleItem_ = Double.NaN;
doubleItems_ = new double[k_];
}
- else if (memValidate.singleItem && !updatableMemFormat) {
+ else if (memValidate.singleItem && !serialVersionUpdatable) {
final double item = srcMem.getDouble(DATA_START_ADR_SINGLE_ITEM);
minDoubleItem_ = maxDoubleItem_ = item;
doubleItems_ = new double[k_];
@@ -101,7 +109,7 @@ final class KllHeapDoublesSketch extends KllDoublesSketch {
}
else { //Full or updatableMemFormat
int offsetBytes = DATA_START_ADR;
- offsetBytes += (updatableMemFormat ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES);
+ offsetBytes += (serialVersionUpdatable ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES);
minDoubleItem_ = srcMem.getDouble(offsetBytes);
offsetBytes += Double.BYTES;
maxDoubleItem_ = srcMem.getDouble(offsetBytes);
@@ -110,7 +118,7 @@ final class KllHeapDoublesSketch extends KllDoublesSketch {
final int retainedItems = capacityItems - levelsArr[0];
doubleItems_ = new double[capacityItems];
final int shift = levelsArr[0];
- if (updatableMemFormat) {
+ if (serialVersionUpdatable) {
offsetBytes += shift * Double.BYTES;
srcMem.getDoubleArray(offsetBytes, doubleItems_, shift, retainedItems);
} else {
@@ -119,12 +127,6 @@ final class KllHeapDoublesSketch extends KllDoublesSketch {
}
}
- static KllHeapDoublesSketch heapifyImpl(final Memory srcMem) {
- Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
- final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, DOUBLES_SKETCH);
- return new KllHeapDoublesSketch(srcMem, memVal);
- }
-
@Override
public int getK() { return k_; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
index 73aefdb3..6ce9eaa8 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHeapFloatsSketch.java
@@ -21,6 +21,8 @@ package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
+import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllSketch.Error.NOT_SINGLE_ITEM;
import static org.apache.datasketches.kll.KllSketch.Error.kllSketchThrow;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
@@ -73,6 +75,12 @@ final class KllHeapFloatsSketch extends KllFloatsSketch {
floatItems_ = new float[k];
}
+ static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) {
+ Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
+ final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
+ return new KllHeapFloatsSketch(srcMem, memVal);
+ }
+
/**
* Heapify constructor.
* @param srcMem Memory object that contains data serialized by this sketch.
@@ -86,14 +94,14 @@ final class KllHeapFloatsSketch extends KllFloatsSketch {
minK_ = memValidate.minK;
levelsArr = memValidate.levelsArr;
isLevelZeroSorted_ = memValidate.level0Sorted;
- final boolean updatableMemFormat = memValidate.updatableMemFormat;
+ final boolean serialVersionUpdatable = getMemorySerVer(srcMem) == SERIAL_VERSION_UPDATABLE;
- if (memValidate.empty && !updatableMemFormat) {
+ if (memValidate.empty && !serialVersionUpdatable) {
minFloatItem_ = Float.NaN;
maxFloatItem_ = Float.NaN;
floatItems_ = new float[k_];
}
- else if (memValidate.singleItem && !updatableMemFormat) {
+ else if (memValidate.singleItem && !serialVersionUpdatable) {
final float item = srcMem.getFloat(DATA_START_ADR_SINGLE_ITEM);
minFloatItem_ = maxFloatItem_ = item;
floatItems_ = new float[k_];
@@ -101,7 +109,7 @@ final class KllHeapFloatsSketch extends KllFloatsSketch {
}
else { //Full or updatableMemFormat
int offsetBytes = DATA_START_ADR;
- offsetBytes += (updatableMemFormat ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES);
+ offsetBytes += (serialVersionUpdatable ? levelsArr.length * Integer.BYTES : (levelsArr.length - 1) * Integer.BYTES);
minFloatItem_ = srcMem.getFloat(offsetBytes);
offsetBytes += Float.BYTES;
maxFloatItem_ = srcMem.getFloat(offsetBytes);
@@ -110,7 +118,7 @@ final class KllHeapFloatsSketch extends KllFloatsSketch {
final int retainedItems = capacityItems - levelsArr[0];
floatItems_ = new float[capacityItems];
final int shift = levelsArr[0];
- if (updatableMemFormat) {
+ if (serialVersionUpdatable) {
offsetBytes += shift * Float.BYTES;
srcMem.getFloatArray(offsetBytes, floatItems_, shift, retainedItems);
} else {
@@ -119,12 +127,6 @@ final class KllHeapFloatsSketch extends KllFloatsSketch {
}
}
- static KllHeapFloatsSketch heapifyImpl(final Memory srcMem) {
- Objects.requireNonNull(srcMem, "Parameter 'srcMem' must not be null");
- final KllMemoryValidate memVal = new KllMemoryValidate(srcMem, FLOATS_SKETCH);
- return new KllHeapFloatsSketch(srcMem, memVal);
- }
-
@Override
public int getK() { return k_; }
diff --git a/src/main/java/org/apache/datasketches/kll/KllHelper.java b/src/main/java/org/apache/datasketches/kll/KllHelper.java
index 1384e11b..e071728d 100644
--- a/src/main/java/org/apache/datasketches/kll/KllHelper.java
+++ b/src/main/java/org/apache/datasketches/kll/KllHelper.java
@@ -45,7 +45,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllPreambleUtil.SER_VER_BYTE_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK;
-import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryEmptyFlag;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryK;
@@ -57,7 +56,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySerVer;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemorySingleItemFlag;
-import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryUpdatableFlag;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import static org.apache.datasketches.kll.KllSketch.SketchType.FLOATS_SKETCH;
@@ -617,7 +615,7 @@ final class KllHelper {
return byteArr;
}
- static byte[] fastEmptyCompactByteArray(final KllSketch sketch) {
+ private static byte[] fastEmptyCompactByteArray(final KllSketch sketch) {
final byte[] byteArr = new byte[8];
byteArr[0] = PREAMBLE_INTS_EMPTY_SINGLE; //2
byteArr[1] = SERIAL_VERSION_EMPTY_FULL; //1
@@ -628,7 +626,7 @@ final class KllHelper {
return byteArr;
}
- static byte[] fastSingleItemCompactByteArray(final KllSketch sketch) {
+ private static byte[] fastSingleItemCompactByteArray(final KllSketch sketch) {
final SketchType sketchType = sketch.sketchType;
final byte[] byteArr;
switch (sketchType) {
@@ -644,8 +642,8 @@ final class KllHelper {
ByteArrayUtil.putDoubleLE(byteArr, DATA_START_ADR_SINGLE_ITEM, dblSk.getDoubleSingleItem());
break;
}
-// case ITEMS_SKETCH: {
-// byteArr = null; //TODO
+// case ITEMS_SKETCH: { //TODO
+// byteArr = null;
// break;
// }
default: return null; //can't happen
@@ -668,7 +666,7 @@ final class KllHelper {
final String epsPct = String.format("%.3f%%", sketch.getNormalizedRankError(false) * 100);
final String epsPMFPct = String.format("%.3f%%", sketch.getNormalizedRankError(true) * 100);
final StringBuilder sb = new StringBuilder();
- final String directStr = sketch.updatableMemFormat ? "Direct" : "";
+ final String directStr = sketch.serialVersionUpdatable ? "Direct" : "";
final String skType = sketchType == DOUBLES_SKETCH ? directStr + "Doubles" :
sketchType == FLOATS_SKETCH ? directStr + "Floats" : directStr + "Items";
sb.append(Util.LS).append("### Kll").append(skType).append("Sketch Summary:").append(Util.LS);
@@ -684,7 +682,7 @@ final class KllHelper {
sb.append(" Level 0 Sorted : ").append(sketch.isLevelZeroSorted()).append(Util.LS);
sb.append(" Capacity Items : ").append(levelsArr[numLevels]).append(Util.LS);
sb.append(" Retained Items : ").append(sketch.getNumRetained()).append(Util.LS);
- if (sketch.updatableMemFormat) {
+ if (sketch.serialVersionUpdatable) {
sb.append(" Updatable Storage Bytes: ").append(sketch.getCurrentUpdatableSerializedSizeBytes()).append(Util.LS);
} else {
sb.append(" Compact Storage Bytes : ").append(sketch.getCurrentCompactSerializedSizeBytes()).append(Util.LS);
@@ -735,14 +733,18 @@ final class KllHelper {
* This method exists for testing purposes only. The resulting byteArray
* structure is an internal format and not supported for general transport
* or compatibility between systems and may be subject to change in the future.
+ *
+ * <p>The given sketch already has memory in updatable format. This updates
+ * the flag bits as to the actual state of <i>n</i>.</p>
+ *
* @param sketch the current sketch to be serialized.
* @return a byte array in an updatable form.
*/
private static byte[] toUpdatableByteArrayFromUpdatableMemory(final KllSketch sketch) {
final int curBytes = sketch.getCurrentUpdatableSerializedSizeBytes();
final long n = sketch.getN();
- final byte flags = (byte) (UPDATABLE_BIT_MASK
- | ((n == 0) ? EMPTY_BIT_MASK : 0)
+ final byte flags = (byte)
+ ( ((n == 0) ? EMPTY_BIT_MASK : 0)
| ((n == 1) ? SINGLE_ITEM_BIT_MASK : 0));
final byte[] byteArr = new byte[curBytes];
sketch.wmem.getByteArray(0, byteArr, 0, curBytes);
@@ -758,7 +760,7 @@ final class KllHelper {
* @return a byte array in an updatable form.
*/
static byte[] toUpdatableByteArrayImpl(final KllSketch sketch) {
- if (sketch.hasMemory() && sketch.updatableMemFormat) {
+ if (sketch.hasMemory() && sketch.serialVersionUpdatable) {
return toUpdatableByteArrayFromUpdatableMemory(sketch);
}
final byte[] byteArr = new byte[sketch.getCurrentUpdatableSerializedSizeBytes()];
@@ -895,7 +897,7 @@ final class KllHelper {
// }
//MEMORY SPACE MANAGEMENT
- if (sketch.updatableMemFormat) {
+ if (sketch.serialVersionUpdatable) {
sketch.wmem = memorySpaceMgmt(sketch, myNewLevelsArr.length, myNewTotalItemsCapacity);
}
//update our sketch with new expanded spaces
@@ -965,24 +967,23 @@ final class KllHelper {
return result;
}
- private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem,
- final boolean updatableFormat) {
+ private static void loadFirst8Bytes(final KllSketch sk, final WritableMemory wmem,
+ final boolean serialVersionUpdatable) {
final boolean empty = sk.getN() == 0;
final boolean lvlZeroSorted = sk.isLevelZeroSorted();
final boolean singleItem = sk.getN() == 1;
- final int preInts = updatableFormat
+ final int preInts = serialVersionUpdatable
? PREAMBLE_INTS_FULL
: (empty || singleItem) ? PREAMBLE_INTS_EMPTY_SINGLE : PREAMBLE_INTS_FULL;
//load the preamble
setMemoryPreInts(wmem, preInts);
- final int server = updatableFormat ? SERIAL_VERSION_UPDATABLE
+ final int server = serialVersionUpdatable ? SERIAL_VERSION_UPDATABLE
: (singleItem ? SERIAL_VERSION_SINGLE : SERIAL_VERSION_EMPTY_FULL);
setMemorySerVer(wmem, server);
setMemoryFamilyID(wmem, Family.KLL.getID());
setMemoryEmptyFlag(wmem, empty);
setMemoryLevelZeroSortedFlag(wmem, lvlZeroSorted);
setMemorySingleItemFlag(wmem, singleItem);
- setMemoryUpdatableFlag(wmem, updatableFormat);
setMemoryK(wmem, sk.getK());
setMemoryM(wmem, sk.getM());
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java
index acfebf03..724c5ca0 100644
--- a/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java
+++ b/src/main/java/org/apache/datasketches/kll/KllMemoryValidate.java
@@ -19,7 +19,6 @@
package org.apache.datasketches.kll;
-import static org.apache.datasketches.common.Family.idToFamily;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_PREINTS;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SER_VER;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.EMPTYBIT_AND_SINGLEBIT;
@@ -27,7 +26,6 @@ import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.INV
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_PREINTS;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SINGLEBIT_AND_SER_VER;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.SRC_NOT_KLL;
-import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.UPDATABLEBIT_AND_SER_VER;
import static org.apache.datasketches.kll.KllMemoryValidate.MemoryInputError.memoryValidateThrow;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM;
@@ -48,7 +46,6 @@ import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryNumLevels;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryPreInts;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySerVer;
import static org.apache.datasketches.kll.KllPreambleUtil.getMemorySingleItemFlag;
-import static org.apache.datasketches.kll.KllPreambleUtil.getMemoryUpdatableFormatFlag;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
import org.apache.datasketches.common.Family;
@@ -65,59 +62,57 @@ import org.apache.datasketches.memory.WritableMemory;
*
*/
final class KllMemoryValidate {
- // first 8 bytes
- final int preInts; // = extractPreInts(srcMem);
+ // first 8 bytes of preamble
+ final int preInts;
final int serVer;
final int familyID;
- final String famName;
final int flags;
- boolean empty;
- boolean singleItem;
- final boolean level0Sorted;
- final SketchType sketchType;
- boolean updatableMemFormat = false;
- final boolean readOnly;
final int k;
final int m;
- final int typeBytes;
-
- // depending on the layout, the next 8-16 bytes of the preamble, may be filled with assumed items.
- // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1, etc.
- long n;
- // next 4 bytes
- int minK;
- int numLevels;
- // derived
+ //last byte is unused
+
+ //Flag bits:
+ final boolean empty;
+ final boolean level0Sorted;
+ final boolean singleItem;
+ //From SerVer
+ private boolean serialVersionUpdatable;
+
+ // depending on the layout, the next 8-16 bytes of the preamble, may be derived by assumption.
+ // For example, if the layout is compact & empty, n = 0, if compact and single, n = 1.
+ long n; //8 bytes (if present)
+ int minK; //2 bytes (if present)
+ int numLevels; //1 byte (if present)
+ //unused byte
+ int[] levelsArr; //starts at byte 20, adjusted to include top index here
+
+ // derived, other
int sketchBytes;
- int[] levelsArr; //adjusted to include top index
-
+ private int typeBytes;
+
KllMemoryValidate(final Memory srcMem, final SketchType sketchType) {
-
- readOnly = srcMem.isReadOnly();
preInts = getMemoryPreInts(srcMem);
serVer = getMemorySerVer(srcMem);
-
familyID = getMemoryFamilyID(srcMem);
if (familyID != Family.KLL.getID()) { memoryValidateThrow(SRC_NOT_KLL, familyID); }
- famName = idToFamily(familyID).toString();
flags = getMemoryFlags(srcMem);
- updatableMemFormat = getMemoryUpdatableFormatFlag(srcMem);
- empty = getMemoryEmptyFlag(srcMem);
- singleItem = getMemorySingleItemFlag(srcMem);
- level0Sorted = getMemoryLevelZeroSortedFlag(srcMem);
- this.sketchType = sketchType;
k = getMemoryK(srcMem);
m = getMemoryM(srcMem);
KllHelper.checkM(m);
KllHelper.checkK(k, m);
- if ((serVer == SERIAL_VERSION_UPDATABLE) ^ updatableMemFormat) { memoryValidateThrow(UPDATABLEBIT_AND_SER_VER, 1); }
+
+ empty = getMemoryEmptyFlag(srcMem);
+ level0Sorted = getMemoryLevelZeroSortedFlag(srcMem);
+ singleItem = getMemorySingleItemFlag(srcMem);
+
+ serialVersionUpdatable = serVer == SERIAL_VERSION_UPDATABLE;
typeBytes = (sketchType == DOUBLES_SKETCH) ? Double.BYTES : Float.BYTES;
- if (updatableMemFormat) { updatableMemFormatValidate((WritableMemory) srcMem); }
+ if (serialVersionUpdatable) { updatableMemFormatValidate((WritableMemory) srcMem); }
else { compactMemoryValidate(srcMem); }
}
- void compactMemoryValidate(final Memory srcMem) { //FOR HEAPIFY
+ private void compactMemoryValidate(final Memory srcMem) { //FOR HEAPIFY. NOT UPDATABLE
if (empty && singleItem) { memoryValidateThrow(EMPTYBIT_AND_SINGLEBIT, flags); }
final int sw = (empty ? 1 : 0) | (singleItem ? 4 : 0);
@@ -129,7 +124,7 @@ final class KllMemoryValidate {
minK = getMemoryMinK(srcMem);
numLevels = getMemoryNumLevels(srcMem);
- // Create Levels Arr
+ // Get Levels Arr and add the last element
levelsArr = new int[numLevels + 1];
srcMem.getIntArray(DATA_START_ADR, levelsArr, 0, numLevels); //copies all except the last one
final int capacityItems = KllHelper.computeTotalItemCapacity(k, m, numLevels);
@@ -163,11 +158,9 @@ final class KllMemoryValidate {
}
}
- void updatableMemFormatValidate(final WritableMemory wSrcMem) {
+ private void updatableMemFormatValidate(final WritableMemory wSrcMem) {
if (preInts != PREAMBLE_INTS_FULL) { memoryValidateThrow(INVALID_PREINTS, preInts); }
n = getMemoryN(wSrcMem);
- empty = n == 0; //empty & singleItem are set for convenience
- singleItem = n == 1; // there is no error checking on these bits
minK = getMemoryMinK(wSrcMem);
numLevels = getMemoryNumLevels(wSrcMem);
@@ -187,7 +180,6 @@ final class KllMemoryValidate {
SINGLEBIT_AND_SER_VER("Single Item Bit: 1 -> SerVer: " + SERIAL_VERSION_SINGLE + ", NOT: "),
SINGLEBIT_AND_PREINTS("Single Item Bit: 1 -> PreInts: " + PREAMBLE_INTS_EMPTY_SINGLE + ", NOT: "),
INVALID_PREINTS("PreInts Must Be: " + PREAMBLE_INTS_FULL + ", NOT: "),
- UPDATABLEBIT_AND_SER_VER("((SerVer == 3) ^ (Updatable Bit)) must = 0, NOT: "),
EMPTYBIT_AND_SINGLEBIT("Empty flag bit and SingleItem flag bit cannot both be set. Flags: ");
private String msg;
diff --git a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java
index 5a3cf6e7..62d243e4 100644
--- a/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java
+++ b/src/main/java/org/apache/datasketches/kll/KllPreambleUtil.java
@@ -19,6 +19,7 @@
package org.apache.datasketches.kll;
+import static org.apache.datasketches.common.Family.idToFamily;
import static org.apache.datasketches.common.Util.zeroPad;
import static org.apache.datasketches.kll.KllSketch.SketchType.DOUBLES_SKETCH;
@@ -37,14 +38,28 @@ import org.apache.datasketches.memory.WritableMemory;
* This allows the possibility of the introduction of different serialization
* schemes with minimal impact on the rest of the library.</p>
*
- * <p>
- * LAYOUT: The low significance bytes of this <i>long</i> based data structure are on the right.
+ * <h3>Visual Layout</h3>
+ * The low significance bytes of this <i>long</i> based data structure are on the right.
* The multi-byte primitives are stored in native byte order.
- * The single byte fields are treated as unsigned.</p>
+ * The numeric <i>byte</i> and <i>short</i> fields are treated as unsigned.
+ * The numeric <i>int</i> and <i>long</i> fields are treated as signed.
*
- * <p>An empty sketch requires only 8 bytes, which is only preamble.
- * A serialized, non-empty KllDoublesSketch requires at least 16 bytes of preamble.
- * A serialized, non-empty KllFloatsSketch requires at least 12 bytes of preamble.</p>
+ * <h3>Preamble Sizes</h3>
+ * The preamble has 2 formats or sizes.
+ * <ul><li>A serialized empty sketch requires 8 bytes, all preamble. It is not updatable.</li>
+ * <li>A serialized, single-item sketch requires 8 bytes of preamble, followed by the one item. It is not updatable.</li>
+ * <li>A serialized, <i>n > 1</i> sketch requires at least 20 bytes of preamble (5 ints).
+ * This is followed by the Levels int array, followed by the min and max values,
+ * followed by the item data arrays. It can be in compact, not updatable format or in regular, updatable format.</li>
+ * </ul>
+ *
+ * <h3>Compact Formats</h3>
+ * <ul><li>The empty and single-item formats are by definition compact and non-updatable.</li>
+ * <li>The compact "full" format differs from the fully updatable (writable) format in two ways:
+ * <ul><li>The last entry of the Levels int array is omitted because it can be derived.</li>
+ * <li>All empty space of the data arrays is removed in the serialization.
+ * The empty space can be reconstructed.</li></ul>
+ * </ul>
*
* <pre>{@code
* Serialized float sketch layout, more than one item:
@@ -92,33 +107,26 @@ import org.apache.datasketches.memory.WritableMemory;
* 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts |
* || | 8 |
* 1 ||------------------------------Single Item-------------------------------------|
+ * }</pre>
+ * The placement and structure of the data block depends on Layout:
+ * <ul><li>For SerVer = SERIAL_VERSION_EMPTY_FULL (1) and <i>n</i> = 0:<br>
+ * The sketch is empty. The preamble is 8 bytes. There is no data.</li>
+ *
+ * <li>For SerVer = SERIAL_VERSION_SINGLE (2), <i>n</i> is assumed to be 1:<br>
+ * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8.</li>
*
- * The structure of the data block depends on Layout:
- *
- * For FLOAT_SINGLE_COMPACT or DOUBLE_SINGLE_COMPACT:
- * The single data item is at offset DATA_START_ADR_SINGLE_ITEM = 8
- *
- * For FLOAT_FULL_COMPACT:
- * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers;
- * Followed by Float Min_Item, then Float Max_Item
- * Followed by an array of Floats of length retainedItems()
- *
- * For DOUBLE_FULL_COMPACT
- * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of numLevels integers;
- * Followed by Double Min_Item, then Double Max_Item
- * Followed by an array of Doubles of length retainedItems()
- *
- * For FLOAT_UPDATABLE
- * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers;
- * Followed by Float Min_Item, then Float Max_Item
- * Followed by an array of Floats of length KllHelper.computeTotalItemCapacity(...).
+ * <li>For SerVer = SERIAL_VERSION_EMPTY_FULL (1) and <i>n</i> > 1:<br>
+ * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of numLevels integers,
+ * <ul><li>Followed by Min_Item, then Max_Item,</li>
+ * <li>Followed by an array of items of length retainedItems().<br>
+ * The total byte length is dependent on item type.</li></ul>
*
- * For DOUBLE_UPDATABLE
- * The int[] levels array starts at offset DATA_START_ADR_DOUBLE = 20 with a length of (numLevels + 1) integers;
- * Followed by Double Min_Item, then Double Max_Item
- * Followed by an array of Doubles of length KllHelper.computeTotalItemCapacity(...).
- *
- * }</pre>
+ * <li>For SerVer = SERIAL_VERSION_UPDATABLE (3)<br>
+ * The int[] levels array starts at offset DATA_START_ADR_FLOAT = 20 with a length of (numLevels + 1) integers;
+ * <ul><li>Followed by Min_Item, then Max_Item,</li>
+ * <li>Followed by an array of items of length KllHelper.computeTotalItemCapacity(...).<br>
+ * The total byte length is dependent on item type.</li></ul>
+ * </ul>
*
* @author Lee Rhodes
*/
@@ -144,13 +152,13 @@ final class KllPreambleUtil {
static final int MIN_K_SHORT_ADR = 16; // to 17
static final int NUM_LEVELS_BYTE_ADR = 18;
- // 19 is reserved for future use
+ // 19 is reserved for future use
static final int DATA_START_ADR = 20; // Full Sketch, not single item
// Other static members
- static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format
+ static final byte SERIAL_VERSION_EMPTY_FULL = 1; // Empty or full preamble, NOT single item format, NOT updatable
static final byte SERIAL_VERSION_SINGLE = 2; // only single-item format
- static final byte SERIAL_VERSION_UPDATABLE = 3; //
+ static final byte SERIAL_VERSION_UPDATABLE = 3; // PreInts=5, Full preamble + LevelsArr + min, max + empty space
static final byte PREAMBLE_INTS_EMPTY_SINGLE = 2; // for empty or single item
static final byte PREAMBLE_INTS_FULL = 5; // Full preamble, not empty nor single item
static final byte KLL_FAMILY = 15;
@@ -159,7 +167,6 @@ final class KllPreambleUtil {
static final int EMPTY_BIT_MASK = 1;
static final int LEVEL_ZERO_SORTED_BIT_MASK = 2;
static final int SINGLE_ITEM_BIT_MASK = 4;
- static final int UPDATABLE_BIT_MASK = 16;
/**
* Returns a human readable string summary of the internal state of the given sketch byte array.
@@ -188,26 +195,27 @@ final class KllPreambleUtil {
final String flagsStr = (flags) + ", 0x" + (Integer.toHexString(flags)) + ", "
+ zeroPad(Integer.toBinaryString(flags), 8);
final int preInts = memVal.preInts;
- final boolean doublesSketch = memVal.sketchType == DOUBLES_SKETCH;
- final boolean updatableMemFormat = memVal.updatableMemFormat;
+ final boolean serialVersionUpdatable = getMemorySerVer(mem) == SERIAL_VERSION_UPDATABLE;
final boolean empty = memVal.empty;
final boolean singleItem = memVal.singleItem;
final int sketchBytes = memVal.sketchBytes;
- final int typeBytes = memVal.typeBytes;
+ final int typeBytes = sketchType == DOUBLES_SKETCH ? Double.BYTES : Float.BYTES;
+ final int familyID = getMemoryFamilyID(mem);
+ final String famName = idToFamily(familyID).toString();
final StringBuilder sb = new StringBuilder();
sb.append(Util.LS).append("### KLL SKETCH MEMORY SUMMARY:").append(LS);
sb.append("Byte 0 : Preamble Ints : ").append(preInts).append(LS);
sb.append("Byte 1 : SerVer : ").append(memVal.serVer).append(LS);
sb.append("Byte 2 : FamilyID : ").append(memVal.familyID).append(LS);
- sb.append(" FamilyName : ").append(memVal.famName).append(LS);
+ sb.append(" FamilyName : ").append(famName).append(LS);
sb.append("Byte 3 : Flags Field : ").append(flagsStr).append(LS);
sb.append(" Bit Flag Name").append(LS);
sb.append(" 0 EMPTY COMPACT : ").append(empty).append(LS);
sb.append(" 1 LEVEL_ZERO_SORTED : ").append(memVal.level0Sorted).append(LS);
sb.append(" 2 SINGLE_ITEM COMPACT : ").append(singleItem).append(LS);
- sb.append(" 3 DOUBLES_SKETCH : ").append(doublesSketch).append(LS);
- sb.append(" 4 UPDATABLE : ").append(updatableMemFormat).append(LS);
+ sb.append(" 3 DOUBLES_SKETCH : ").append(sketchType == DOUBLES_SKETCH).append(LS);
+ sb.append(" 4 UPDATABLE : ").append(serialVersionUpdatable).append(LS);
sb.append("Bytes 4-5 : K : ").append(memVal.k).append(LS);
sb.append("Byte 6 : Min Level Cap, M : ").append(memVal.m).append(LS);
sb.append("Byte 7 : (Reserved) : ").append(LS);
@@ -215,7 +223,7 @@ final class KllPreambleUtil {
final long n = memVal.n;
final int minK = memVal.minK;
final int numLevels = memVal.numLevels;
- if (updatableMemFormat || (!empty && !singleItem)) {
+ if (serialVersionUpdatable || (!empty && !singleItem)) {
sb.append("Bytes 8-15: N : ").append(n).append(LS);
sb.append("Bytes 16-17: MinK : ").append(minK).append(LS);
sb.append("Byte 18 : NumLevels : ").append(numLevels).append(LS);
@@ -235,7 +243,7 @@ final class KllPreambleUtil {
sb.append("### START KLL DATA:").append(LS);
int offsetBytes = 0;
- if (updatableMemFormat) {
+ if (serialVersionUpdatable) {
sb.append("LEVELS ARR:").append(LS);
offsetBytes = DATA_START_ADR;
for (int i = 0; i < numLevels + 1; i++) {
@@ -243,7 +251,7 @@ final class KllPreambleUtil {
offsetBytes += Integer.BYTES;
}
sb.append("MIN/MAX:").append(LS);
- if (doublesSketch) {
+ if (sketchType == DOUBLES_SKETCH) {
sb.append(mem.getDouble(offsetBytes)).append(LS);
offsetBytes += typeBytes;
sb.append(mem.getDouble(offsetBytes)).append(LS);
@@ -256,7 +264,7 @@ final class KllPreambleUtil {
}
sb.append("ITEMS DATA").append(LS);
final int itemsSpace = (sketchBytes - offsetBytes) / typeBytes;
- if (doublesSketch) {
+ if (sketchType == DOUBLES_SKETCH) {
for (int i = 0; i < itemsSpace; i++) {
sb.append(i + ", " + mem.getDouble(offsetBytes)).append(LS);
offsetBytes += typeBytes;
@@ -277,7 +285,7 @@ final class KllPreambleUtil {
}
sb.append("(top level of Levels arr is absent)").append(LS);
sb.append("MIN/MAX:").append(LS);
- if (doublesSketch) {
+ if (sketchType == DOUBLES_SKETCH) {
sb.append(mem.getDouble(offsetBytes)).append(LS);
offsetBytes += typeBytes;
sb.append(mem.getDouble(offsetBytes)).append(LS);
@@ -290,7 +298,7 @@ final class KllPreambleUtil {
}
sb.append("ITEMS DATA").append(LS);
final int itemSpace = (sketchBytes - offsetBytes) / typeBytes;
- if (doublesSketch) {
+ if (sketchType == DOUBLES_SKETCH) {
for (int i = 0; i < itemSpace; i++) {
sb.append(i + ", " + mem.getDouble(offsetBytes)).append(LS);
offsetBytes += typeBytes;
@@ -305,7 +313,7 @@ final class KllPreambleUtil {
} else { //single item
if (singleItem) {
sb.append("SINGLE ITEM DATA").append(LS);
- sb.append(doublesSketch
+ sb.append(sketchType == DOUBLES_SKETCH
? mem.getDouble(DATA_START_ADR_SINGLE_ITEM)
: mem.getFloat(DATA_START_ADR_SINGLE_ITEM)).append(LS);
}
@@ -343,10 +351,6 @@ final class KllPreambleUtil {
return (getMemoryFlags(mem) & SINGLE_ITEM_BIT_MASK) != 0;
}
- static boolean getMemoryUpdatableFormatFlag(final Memory mem) {
- return (getMemoryFlags(mem) & UPDATABLE_BIT_MASK) != 0;
- }
-
static int getMemoryK(final Memory mem) {
return mem.getShort(K_SHORT_ADR) & 0XFFFF;
}
@@ -398,11 +402,6 @@ final class KllPreambleUtil {
setMemoryFlags(wmem, singleItem ? flags | SINGLE_ITEM_BIT_MASK : flags & ~SINGLE_ITEM_BIT_MASK);
}
- static void setMemoryUpdatableFlag(final WritableMemory wmem, final boolean updatable) {
- final int flags = getMemoryFlags(wmem);
- setMemoryFlags(wmem, updatable ? flags | UPDATABLE_BIT_MASK : flags & ~UPDATABLE_BIT_MASK);
- }
-
static void setMemoryK(final WritableMemory wmem, final int memK) {
wmem.putShort(K_SHORT_ADR, (short) memK);
}
diff --git a/src/main/java/org/apache/datasketches/kll/KllSketch.java b/src/main/java/org/apache/datasketches/kll/KllSketch.java
index 700416c8..aa5bb204 100644
--- a/src/main/java/org/apache/datasketches/kll/KllSketch.java
+++ b/src/main/java/org/apache/datasketches/kll/KllSketch.java
@@ -22,6 +22,7 @@ package org.apache.datasketches.kll;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR;
import static org.apache.datasketches.kll.KllPreambleUtil.DATA_START_ADR_SINGLE_ITEM;
import static org.apache.datasketches.kll.KllPreambleUtil.N_LONG_ADR;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_UPDATABLE;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_DOUBLE;
import static org.apache.datasketches.kll.KllSketch.Error.SRC_MUST_BE_FLOAT;
import static org.apache.datasketches.kll.KllSketch.Error.TGT_IS_READ_ONLY;
@@ -89,8 +90,7 @@ public abstract class KllSketch implements QuantilesAPI {
SRC_MUST_BE_DOUBLE("Given sketch must be of type Double."),
SRC_MUST_BE_FLOAT("Given sketch must be of type Float."),
MRS_MUST_NOT_BE_NULL("MemoryRequestServer cannot be null."),
- NOT_SINGLE_ITEM("Sketch is not single item."),
- MUST_NOT_BE_UPDATABLE_FORMAT("Given Memory object must not be in updatableFormat.");
+ NOT_SINGLE_ITEM("Sketch is not single item.");
private String msg;
@@ -128,7 +128,7 @@ public abstract class KllSketch implements QuantilesAPI {
static final int MIN_M = 2; //The minimum M
static final Random random = new Random();
final SketchType sketchType;
- final boolean updatableMemFormat;
+ final boolean serialVersionUpdatable;
final MemoryRequestServer memReqSvr;
final boolean readOnly;
int[] levelsArr;
@@ -147,8 +147,8 @@ public abstract class KllSketch implements QuantilesAPI {
this.sketchType = sketchType;
this.wmem = wmem;
if (wmem != null) {
- this.updatableMemFormat = KllPreambleUtil.getMemoryUpdatableFormatFlag(wmem);
- this.readOnly = wmem.isReadOnly() || !updatableMemFormat;
+ this.serialVersionUpdatable = KllPreambleUtil.getMemorySerVer(wmem) == SERIAL_VERSION_UPDATABLE;
+ this.readOnly = wmem.isReadOnly() || !serialVersionUpdatable;
if (readOnly) {
this.memReqSvr = null;
} else {
@@ -156,7 +156,7 @@ public abstract class KllSketch implements QuantilesAPI {
this.memReqSvr = memReqSvr;
}
} else { //wmem is null, heap case
- this.updatableMemFormat = false;
+ this.serialVersionUpdatable = false;
this.memReqSvr = null;
this.readOnly = false;
}
@@ -270,7 +270,7 @@ public abstract class KllSketch implements QuantilesAPI {
* @return the number of bytes this sketch would require if serialized.
*/
public int getSerializedSizeBytes() {
- return (updatableMemFormat)
+ return (serialVersionUpdatable)
? getCurrentUpdatableSerializedSizeBytes()
: getCurrentCompactSerializedSizeBytes();
}
@@ -309,7 +309,7 @@ public abstract class KllSketch implements QuantilesAPI {
* @return true if the backing WritableMemory is in updatable format.
*/
public final boolean isMemoryUpdatableFormat() {
- return hasMemory() && updatableMemFormat;
+ return hasMemory() && serialVersionUpdatable;
}
@Override
@@ -391,7 +391,7 @@ public abstract class KllSketch implements QuantilesAPI {
abstract void incNumLevels();
final boolean isCompactSingleItem() {
- return hasMemory() && !updatableMemFormat && (getN() == 1);
+ return hasMemory() && !serialVersionUpdatable && (getN() == 1);
}
boolean isDoublesSketch() { return sketchType == DOUBLES_SKETCH; }
diff --git a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java
index 5c23731f..972e186a 100644
--- a/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java
+++ b/src/test/java/org/apache/datasketches/kll/KllMemoryValidateTest.java
@@ -23,8 +23,8 @@ import static org.apache.datasketches.kll.KllPreambleUtil.EMPTY_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_EMPTY_SINGLE;
import static org.apache.datasketches.kll.KllPreambleUtil.PREAMBLE_INTS_FULL;
import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_EMPTY_FULL;
+import static org.apache.datasketches.kll.KllPreambleUtil.SERIAL_VERSION_SINGLE;
import static org.apache.datasketches.kll.KllPreambleUtil.SINGLE_ITEM_BIT_MASK;
-import static org.apache.datasketches.kll.KllPreambleUtil.UPDATABLE_BIT_MASK;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFamilyID;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryFlags;
import static org.apache.datasketches.kll.KllPreambleUtil.setMemoryPreInts;
@@ -72,8 +72,7 @@ public class KllMemoryValidateTest {
KllFloatsSketch sk = KllFloatsSketch.newHeapInstance();
byte[] byteArr = sk.toByteArray();
WritableMemory wmem = WritableMemory.writableWrap(byteArr);
- setMemoryFlags(wmem, UPDATABLE_BIT_MASK);
- setMemorySerVer(wmem, SERIAL_VERSION_EMPTY_FULL);
+ setMemorySerVer(wmem, SERIAL_VERSION_SINGLE);
KllMemoryValidate memVal = new KllMemoryValidate(wmem, FLOATS_SKETCH);
}
diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml
index 0c55318a..0f5c90f9 100644
--- a/tools/SketchesCheckstyle.xml
+++ b/tools/SketchesCheckstyle.xml
@@ -64,7 +64,7 @@ under the License.
<!-- Size Violations -->
<module name="LineLength">
<property name="severity" value="warning"/>
- <property name="max" value="120"/>
+ <property name="max" value="140"/>
<property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
<!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> -->
</module>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org