You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/08/27 01:42:08 UTC
[incubator-datasketches-java] branch RelativeErrorQuantiles
updated: Implemented Strategy 2.
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch RelativeErrorQuantiles
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git
The following commit(s) were added to refs/heads/RelativeErrorQuantiles by this push:
new 51aff76 Implemented Strategy 2.
51aff76 is described below
commit 51aff7699f8c1665e42ffc4e5730a93a58602a4b
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Aug 26 18:41:42 2020 -0700
Implemented Strategy 2.
---
.../java/org/apache/datasketches/req/Buffer.java | 9 ++
.../apache/datasketches/req/RelativeCompactor.java | 130 +++++++++++++--------
.../datasketches/req/RelativeErrorQuantiles.java | 64 ++++++----
.../datasketches/req/RelativeErrorSketchTest.java | 28 ++++-
4 files changed, 152 insertions(+), 79 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/req/Buffer.java b/src/main/java/org/apache/datasketches/req/Buffer.java
index 1887946..e4ad287 100755
--- a/src/main/java/org/apache/datasketches/req/Buffer.java
+++ b/src/main/java/org/apache/datasketches/req/Buffer.java
@@ -202,6 +202,15 @@ class Buffer {
}
/**
+ * Returns the active length = item count.
+ *
+ * @return the active length of this buffer.
+ */
+ int getLength() {
+ return count_;
+ }
+
+ /**
* Returns an array of the odd values from the range start (inclusive) to end (exclusive).
* The odd values are with respect to the start index. If the starting index is odd with
* respect to the origin of the Buffer, then this will actually return the even values.
diff --git a/src/main/java/org/apache/datasketches/req/RelativeCompactor.java b/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
index b2d5a46..25bb3eb 100644
--- a/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
+++ b/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
@@ -19,7 +19,6 @@
package org.apache.datasketches.req;
-import static java.lang.Math.min;
import static java.lang.Math.round;
import static org.apache.datasketches.Util.numberOfTrailingOnes;
import static org.apache.datasketches.req.Buffer.LS;
@@ -56,7 +55,7 @@ public class RelativeCompactor {
* Constructor
* @param sectionSize the value of k
* @param lgWeight this compactor's lgWeight
- * @param debug optional
+ * @param debug true for debug info
*/
RelativeCompactor(
final int sectionSize,
@@ -76,11 +75,13 @@ public class RelativeCompactor {
if (debug) { rand = new Random(1); }
else { rand = new Random(); }
- if (debug) {
- println(" New Compactor: height: " + lgWeight
- + "\tsectionSize: " + sectionSize
- + "\tnumSections: " + numSections + LS);
- }
+ if (debug) { printNewCompactor(); }
+ }
+
+ private void printNewCompactor() {
+ println(" New Compactor: height: " + lgWeight
+ + "\tsectionSize: " + sectionSize
+ + "\tnumSections: " + numSections + LS);
}
/**
@@ -116,64 +117,82 @@ public class RelativeCompactor {
* @return the array of items to be promoted to the next level compactor
*/
float[] compact() {
- if (debug) {
- println(" Compacting[" + lgWeight + "] nomCapacity: " + getNomCapacity()
- + "\tsectionSize: " + sectionSize
- + "\tnumSections: " + numSections
- + "\tstate(bin): " + Integer.toBinaryString(state));
- }
-
- if (!buf.isSorted()) {
- buf.sort(); //Footnote 1
- }
-
- if (debug) { print(" "); print(toHorizontalList(0)); }
+ final int count = buf.getItemCount();
+ if (debug) { printCompactingStart(); }
+ if (!buf.isSorted()) { buf.sort(); } //Footnote 1
+ if (debug) { print(" "); print(toHorizontalList(0)); } //#decimals
//choose a part of the buffer to compact
- final int compactionOffset; //a.k.a. "s" see footnote 2
- final int secsToCompact = min(numberOfTrailingOnes(state) + 1, numSections - 1);
- compactionOffset = buf.getItemCount() - (secsToCompact * sectionSize);
-
- adjustSectSizeNumSect(); //see Footnotes 3, 4 and 8
+ final int secsToCompact = numberOfTrailingOnes(state) + 1;
+ final int compactionStart = computeCompactionStart(secsToCompact); //a.k.a. "s" see footnote 2
+ assert compactionStart <= (count - 2); //Footnote 5
- assert compactionOffset <= (buf.getItemCount() - 2); //Footnote 5; This is easier to read!
-
- if ((numCompactions % 2) == 1) { coin = !coin; } //invert coin; Footnote 6
+ if ((numCompactions & 1) == 1) { coin = !coin; } //if odd, flip coin; Footnote 6
else { coin = (rand.nextDouble() < 0.5); } //random coin flip
final float[] promote = (coin)
- ? buf.getOdds(compactionOffset, buf.getItemCount())
- : buf.getEvens(compactionOffset, buf.getItemCount());
-
- if (debug) { //Footnote 7
- println(" s: " + compactionOffset
- + "\tsecsToComp: " + secsToCompact
- + "\tsectionSize: " + sectionSize
- + "\tnumSections: " + numSections);
- final int delete = buf.getItemCount() - compactionOffset;
- final int promoteLen = promote.length;
- final int offset = (coin) ? 1 : 0;
- println(" Promote: " + promoteLen + "\tDelete: " + delete + "\tOffset: " + offset);
- }
+ ? buf.getOdds(compactionStart, count)
+ : buf.getEvens(compactionStart, count);
+
+ if (debug) { printCompactionDetail(compactionStart, secsToCompact, promote.length); }
- buf.trimLength(compactionOffset);
+ buf.trimLength(compactionStart);
numCompactions += 1;
state += 1;
- if (debug) {
- println(" DONE: nomCapacity: " + getNomCapacity()
- + "\tnumCompactions: " + numCompactions);
+ if (numCompactions >= (1 << (numSections - 1))) {
+ adjustSectSizeNumSect(); //see Footnotes 3, 4 and 8
+ printAdjSecSizeNumSec();
}
+
+ if (debug) { printCompactionDone(); }
+
return promote;
} //End Compact
- /**
- * Sets the current nominal capacity of this compactor.
- * @return the current maximum capacity of this compactor.
- */
- int getNomCapacity() {
- final int nCap = 2 * numSections * sectionSize;
- return nCap;
+ private int computeCompactionStart(final int secsToCompact) {
+ int s = (getNomCapacity() / 2) + ((numSections - secsToCompact) * sectionSize);
+ return (((buf.getItemCount() - s) & 1) == 1) ? ++s : s;
+ }
+
+ private void printAdjSecSizeNumSec() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append(" ");
+ sb.append("Adjust: SectionSize: ").append(sectionSize);
+ sb.append(" NumSections: ").append(numSections);
+ println(sb.toString());
+ }
+
+ private void printCompactingStart() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append(" ");
+ sb.append("Compacting[").append(lgWeight).append("] ");
+ sb.append("NomCapacity: ").append(getNomCapacity());
+ sb.append("\tSectionSize: ").append(sectionSize);
+ sb.append("\tNumSections: ").append(numSections);
+ sb.append("\tState(bin): ").append(Integer.toBinaryString(state));
+ println(sb.toString());
+ }
+
+ private void printCompactionDetail(final int compactionStart, final int secsToCompact,
+ final int promoteLen) { //Footnote 7
+ final StringBuilder sb = new StringBuilder();
+ final int count = buf.getItemCount();
+ sb.append(" ");
+ sb.append("SecsToCompact: ").append(secsToCompact);
+ sb.append("\tNoCompact: ").append(compactionStart);
+ sb.append("\tDoCompact: ").append(count - compactionStart).append(LS);
+ final int delete = count - compactionStart;
+ final String oddOrEven = (coin) ? "Odds" : "Evens";
+ sb.append(" ");
+ sb.append("Promote: ").append(promoteLen);
+ sb.append("\tDelete: ").append(delete);
+ sb.append("\tChoose: ").append(oddOrEven);
+ println(sb.toString());
+ }
+
+ private void printCompactionDone() {
+ println(" DONE: NumCompactions: " + numCompactions);
}
/**
@@ -221,6 +240,15 @@ public class RelativeCompactor {
}
/**
+ * Sets the current nominal capacity of this compactor.
+ * @return the current maximum capacity of this compactor.
+ */
+ int getNomCapacity() {
+ final int nCap = 2 * numSections * sectionSize;
+ return nCap;
+ }
+
+ /**
* Gets the number of retained values in this compactor.
* @return the number of retained values in this compactor.
*/
diff --git a/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java b/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
index 8186964..9c7e426 100644
--- a/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
+++ b/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
@@ -59,7 +59,7 @@ public class RelativeErrorQuantiles {
//constant probability
final static int DEFAULT_K = 50;
- private int k; //default
+ private int k;
private boolean debug = false;
List<RelativeCompactor> compactors = new ArrayList<>();
//int levels; //number of compactors; was H
@@ -119,24 +119,17 @@ public class RelativeErrorQuantiles {
void compress(final boolean lazy) {
updateMaxNomSize();
- if (debug) {
- println("COMPRESS: sKsize: " + size + "\t>=\t"
- + "\tmaxNomSize: " + maxNomSize
- + "\tN: " + totalN);
- }
+ if (debug) { printStartCompress(); }
if (size < maxNomSize) { return; }
for (int h = 0; h < compactors.size(); h++) {
final RelativeCompactor c = compactors.get(h);
- final int re = c.getNumRetainedEntries();
- final int nc = c.getNomCapacity();
+ final int retEnt = c.getNumRetainedEntries();
+ final int nomCap = c.getNomCapacity();
- if (re >= nc) {
+ if (retEnt >= nomCap) {
if ((h + 1) >= levels()) {
- if (debug) {
- println(" Must Add Compactor: len(c[" + h + "]): "
- + re + "\t>=\tc[" + h + "].nomCapacity(): " + nc);
- }
+ if (debug) { printAddCompactor(h, retEnt, nomCap); }
grow(); //add a level
}
final float[] promoted = c.compact();
@@ -145,20 +138,47 @@ public class RelativeErrorQuantiles {
if (lazy && (size < maxNomSize)) { break; }
}
}
- if (debug) {
- for (int h = 0; h < compactors.size(); h++) {
- final RelativeCompactor c = compactors.get(h);
- print(c.toHorizontalList(0));
- }
- println("COMPRESS: DONE: sKsize: " + size
- + "\tMaxNomSize: " + maxNomSize + LS);
+ if (debug) { printAllHorizList(); }
+ }
+
+ private static void printAddCompactor(final int h, final int retEnt, final int nomCap) {
+ final StringBuilder sb = new StringBuilder();
+ sb.append(" ");
+ sb.append("Must Add Compactor: len(c[").append(h).append("]): ");
+ sb.append(retEnt).append(" >= c[").append(h).append("].nomCapacity(): ").append(nomCap);
+ println(sb.toString());
+ }
+
+ private void printStartCompress() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("COMPRESS: ");
+ sb.append("skSize: ").append(size).append(" >= ");
+ sb.append("MaxNomSize: ").append(maxNomSize);
+ sb.append("; N: ").append(totalN);
+ println(sb.toString());
+ }
+
+ private void printAllHorizList() {
+ for (int h = 0; h < compactors.size(); h++) {
+ final RelativeCompactor c = compactors.get(h);
+ print(c.toHorizontalList(0));
}
+ println("COMPRESS: DONE: sKsize: " + size
+ + "\tMaxNomSize: " + maxNomSize + LS);
}
// public double[] getCDF(final float[] splitPoints) {
// return getPmfOrCdf(splitPoints, true);
// }
+ /**
+ * Gets the total number of items offered to the sketch.
+ * @return the total number of items offered to the sketch.
+ */
+ public long getN() {
+ return totalN;
+ }
+
@SuppressWarnings("static-method")
private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) {
return null;
@@ -243,7 +263,7 @@ public class RelativeErrorQuantiles {
return (double)nnRank / totalN;
}
- Pair[] ranks() { //debug
+ Pair[] ranks() { //not yet used for debug
return null;
}
@@ -272,7 +292,7 @@ public class RelativeErrorQuantiles {
}
void update(final float item) {
- if (!Float.isFinite(item)) { return; }
+ if (!Float.isFinite(item)) { return; } //TODO: We may want to throw here
final RelativeCompactor c = compactors.get(0).append(item);
size++;
diff --git a/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java b/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
index 2c8afb4..e0233c7 100644
--- a/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
+++ b/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
@@ -19,6 +19,8 @@
package org.apache.datasketches.req;
+import static org.apache.datasketches.QuantilesHelper.getEvenlySpacedRanks;
+
import org.testng.annotations.Test;
/**
@@ -30,20 +32,34 @@ public class RelativeErrorSketchTest {
@Test
public void test1() {
- RelativeErrorQuantiles sk = new RelativeErrorQuantiles(4, true); //w debug
- for (int i = 101; i-- > 1; ) {
- sk.update(i);
+ RelativeErrorQuantiles sk = new RelativeErrorQuantiles(6, true); //w debug
+ int max = 200;
+ int min = 1;
+ boolean up = false;
+
+ if (up) {
+ for (int i = min; i <= max; i++) {
+ sk.update(i);
+ }
+ } else { //down
+ for (int i = max + 1; i-- > min; ) {
+ sk.update(i);
+ }
}
print(sk.getSummary(0));
- for (float i = 10; i <= 100; i += 10) {
- printRank(sk, i + .5f);
+ double[] ranks = getEvenlySpacedRanks(11);
+ println("Ranks Test:");
+ for (int i = 0; i < ranks.length; i++) {
+ printRank(sk, ((float)ranks[i] * (max - min)) + min);
}
}
private static void printRank(RelativeErrorQuantiles sk, float v) {
double r = sk.rank(v);
- println("Normalized Rank: value: " + v + ", rank: " + r);
+ String rstr = String.format("%.2f", r);
+ String vstr = String.format("%.2f", v);
+ println("Value: " + vstr + ", Rank: " + rstr);
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org