You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2020/08/27 01:42:08 UTC

[incubator-datasketches-java] branch RelativeErrorQuantiles updated: Implemented Strategy 2.

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch RelativeErrorQuantiles
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-java.git


The following commit(s) were added to refs/heads/RelativeErrorQuantiles by this push:
     new 51aff76  Implemented Strategy 2.
51aff76 is described below

commit 51aff7699f8c1665e42ffc4e5730a93a58602a4b
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Aug 26 18:41:42 2020 -0700

    Implemented Strategy 2.
---
 .../java/org/apache/datasketches/req/Buffer.java   |   9 ++
 .../apache/datasketches/req/RelativeCompactor.java | 130 +++++++++++++--------
 .../datasketches/req/RelativeErrorQuantiles.java   |  64 ++++++----
 .../datasketches/req/RelativeErrorSketchTest.java  |  28 ++++-
 4 files changed, 152 insertions(+), 79 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/req/Buffer.java b/src/main/java/org/apache/datasketches/req/Buffer.java
index 1887946..e4ad287 100755
--- a/src/main/java/org/apache/datasketches/req/Buffer.java
+++ b/src/main/java/org/apache/datasketches/req/Buffer.java
@@ -202,6 +202,15 @@ class Buffer {
   }
 
   /**
+   * Returns the active length = item count.
+   *
+   * @return the active length of this buffer.
+   */
+  int getLength() {
+    return count_;
+  }
+
+  /**
    * Returns an array of the odd values from the range start (inclusive) to end (exclusive).
    * The odd values are with respect to the start index. If the starting index is odd with
    * respect to the origin of the Buffer, then this will actually return the even values.
diff --git a/src/main/java/org/apache/datasketches/req/RelativeCompactor.java b/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
index b2d5a46..25bb3eb 100644
--- a/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
+++ b/src/main/java/org/apache/datasketches/req/RelativeCompactor.java
@@ -19,7 +19,6 @@
 
 package org.apache.datasketches.req;
 
-import static java.lang.Math.min;
 import static java.lang.Math.round;
 import static org.apache.datasketches.Util.numberOfTrailingOnes;
 import static org.apache.datasketches.req.Buffer.LS;
@@ -56,7 +55,7 @@ public class RelativeCompactor {
    * Constructor
    * @param sectionSize the value of k
    * @param lgWeight this compactor's lgWeight
-   * @param debug optional
+   * @param debug true for debug info
    */
   RelativeCompactor(
       final int sectionSize,
@@ -76,11 +75,13 @@ public class RelativeCompactor {
     if (debug) { rand = new Random(1); }
     else { rand = new Random(); }
 
-    if (debug) {
-      println("    New Compactor: height: " + lgWeight
-          + "\tsectionSize: " + sectionSize
-          + "\tnumSections: " + numSections + LS);
-    }
+    if (debug) { printNewCompactor(); }
+  }
+
+  private void printNewCompactor() {
+    println("    New Compactor: height: " + lgWeight
+        + "\tsectionSize: " + sectionSize
+        + "\tnumSections: " + numSections + LS);
   }
 
   /**
@@ -116,64 +117,82 @@ public class RelativeCompactor {
    * @return the array of items to be promoted to the next level compactor
    */
   float[] compact() {
-    if (debug) {
-      println("  Compacting[" + lgWeight + "] nomCapacity: " + getNomCapacity()
-        + "\tsectionSize: " + sectionSize
-        + "\tnumSections: " + numSections
-        + "\tstate(bin): " + Integer.toBinaryString(state));
-    }
-
-    if (!buf.isSorted()) {
-      buf.sort(); //Footnote 1
-    }
-
-    if (debug) { print("    "); print(toHorizontalList(0)); }
+    final int count = buf.getItemCount();
+    if (debug) { printCompactingStart(); }
+    if (!buf.isSorted()) { buf.sort(); } //Footnote 1
+    if (debug) { print("    "); print(toHorizontalList(0)); } //#decimals
 
     //choose a part of the buffer to compact
-    final int compactionOffset; //a.k.a.  "s" see footnote 2
-    final int secsToCompact = min(numberOfTrailingOnes(state) + 1, numSections - 1);
-    compactionOffset = buf.getItemCount() - (secsToCompact * sectionSize);
-
-    adjustSectSizeNumSect(); //see Footnotes 3, 4 and 8
+    final int secsToCompact = numberOfTrailingOnes(state) + 1;
+    final int compactionStart = computeCompactionStart(secsToCompact); //a.k.a.  "s" see footnote 2
+    assert compactionStart <= (count - 2); //Footnote 5
 
-    assert compactionOffset <= (buf.getItemCount() - 2); //Footnote 5; This is easier to read!
-
-    if ((numCompactions % 2) == 1) { coin = !coin; } //invert coin; Footnote 6
+    if ((numCompactions & 1) == 1) { coin = !coin; } //if odd, flip coin; Footnote 6
     else { coin = (rand.nextDouble() < 0.5); }       //random coin flip
 
     final float[] promote = (coin)
-        ? buf.getOdds(compactionOffset, buf.getItemCount())
-        : buf.getEvens(compactionOffset, buf.getItemCount());
-
-    if (debug) { //Footnote 7
-      println("    s: " + compactionOffset
-          + "\tsecsToComp: " + secsToCompact
-          + "\tsectionSize: " + sectionSize
-          + "\tnumSections: " + numSections);
-      final int delete = buf.getItemCount() - compactionOffset;
-      final int promoteLen = promote.length;
-      final int offset = (coin) ? 1 : 0;
-      println("    Promote: " + promoteLen + "\tDelete: " + delete + "\tOffset: " + offset);
-    }
+        ? buf.getOdds(compactionStart, count)
+        : buf.getEvens(compactionStart, count);
+
+    if (debug) { printCompactionDetail(compactionStart, secsToCompact, promote.length); }
 
-    buf.trimLength(compactionOffset);
+    buf.trimLength(compactionStart);
     numCompactions += 1;
     state += 1;
 
-    if (debug) {
-      println("    DONE: nomCapacity: " + getNomCapacity()
-        + "\tnumCompactions: " + numCompactions);
+    if (numCompactions >= (1 << (numSections - 1))) {
+      adjustSectSizeNumSect(); //see Footnotes 3, 4 and 8
+      printAdjSecSizeNumSec();
     }
+
+    if (debug) { printCompactionDone(); }
+
     return promote;
   } //End Compact
 
-  /**
-   * Sets the current nominal capacity of this compactor.
-   * @return the current maximum capacity of this compactor.
-   */
-  int getNomCapacity() {
-    final int nCap = 2 * numSections * sectionSize;
-    return nCap;
+  private int computeCompactionStart(final int secsToCompact) {
+    int s = (getNomCapacity() / 2) + ((numSections - secsToCompact) * sectionSize);
+    return (((buf.getItemCount() - s) & 1) == 1) ? ++s : s;
+  }
+
+  private void printAdjSecSizeNumSec() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("    ");
+    sb.append("Adjust: SectionSize: ").append(sectionSize);
+    sb.append(" NumSections: ").append(numSections);
+    println(sb.toString());
+  }
+
+  private void printCompactingStart() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("  ");
+    sb.append("Compacting[").append(lgWeight).append("] ");
+    sb.append("NomCapacity: ").append(getNomCapacity());
+    sb.append("\tSectionSize: ").append(sectionSize);
+    sb.append("\tNumSections: ").append(numSections);
+    sb.append("\tState(bin): ").append(Integer.toBinaryString(state));
+    println(sb.toString());
+  }
+
+  private void printCompactionDetail(final int compactionStart, final int secsToCompact,
+      final int promoteLen) { //Footnote 7
+    final StringBuilder sb = new StringBuilder();
+    final int count = buf.getItemCount();
+    sb.append("    ");
+    sb.append("SecsToCompact: ").append(secsToCompact);
+    sb.append("\tNoCompact: ").append(compactionStart);
+    sb.append("\tDoCompact: ").append(count - compactionStart).append(LS);
+    final int delete = count - compactionStart;
+    final String oddOrEven = (coin) ? "Odds" : "Evens";
+    sb.append("    ");
+    sb.append("Promote: ").append(promoteLen);
+    sb.append("\tDelete: ").append(delete);
+    sb.append("\tChoose: ").append(oddOrEven);
+    println(sb.toString());
+  }
+
+  private void printCompactionDone() {
+    println("    DONE: NumCompactions: " + numCompactions);
   }
 
   /**
@@ -221,6 +240,15 @@ public class RelativeCompactor {
   }
 
   /**
+   * Sets the current nominal capacity of this compactor.
+   * @return the current maximum capacity of this compactor.
+   */
+  int getNomCapacity() {
+    final int nCap = 2 * numSections * sectionSize;
+    return nCap;
+  }
+
+  /**
    * Gets the number of retained values in this compactor.
    * @return the number of retained values in this compactor.
    */
diff --git a/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java b/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
index 8186964..9c7e426 100644
--- a/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
+++ b/src/main/java/org/apache/datasketches/req/RelativeErrorQuantiles.java
@@ -59,7 +59,7 @@ public class RelativeErrorQuantiles {
   //constant probability
   final static int DEFAULT_K = 50;
 
-  private int k; //default
+  private int k;
   private boolean debug = false;
   List<RelativeCompactor> compactors = new ArrayList<>();
   //int levels; //number of compactors; was H
@@ -119,24 +119,17 @@ public class RelativeErrorQuantiles {
 
   void compress(final boolean lazy) {
     updateMaxNomSize();
-    if (debug) {
-      println("COMPRESS:       sKsize: " + size + "\t>=\t"
-          + "\tmaxNomSize: " + maxNomSize
-          + "\tN: " + totalN);
-    }
+    if (debug) { printStartCompress(); }
 
     if (size < maxNomSize) { return; }
     for (int h = 0; h < compactors.size(); h++) {
       final RelativeCompactor c = compactors.get(h);
-      final int re = c.getNumRetainedEntries();
-      final int nc = c.getNomCapacity();
+      final int retEnt = c.getNumRetainedEntries();
+      final int nomCap = c.getNomCapacity();
 
-      if (re >= nc) {
+      if (retEnt >= nomCap) {
         if ((h + 1) >= levels()) {
-          if (debug) {
-            println("  Must Add Compactor: len(c[" + h + "]): "
-                + re + "\t>=\tc[" + h + "].nomCapacity(): " + nc);
-          }
+          if (debug) { printAddCompactor(h, retEnt, nomCap); }
           grow(); //add a level
         }
         final float[] promoted = c.compact();
@@ -145,20 +138,47 @@ public class RelativeErrorQuantiles {
         if (lazy && (size < maxNomSize)) { break; }
       }
     }
-    if (debug) {
-      for (int h = 0; h < compactors.size(); h++) {
-        final RelativeCompactor c = compactors.get(h);
-        print(c.toHorizontalList(0));
-      }
-      println("COMPRESS: DONE: sKsize: " + size
-          + "\tMaxNomSize: " + maxNomSize + LS);
+    if (debug) { printAllHorizList(); }
+  }
+
+  private static void printAddCompactor(final int h, final int retEnt, final int nomCap) {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("  ");
+    sb.append("Must Add Compactor: len(c[").append(h).append("]): ");
+    sb.append(retEnt).append(" >= c[").append(h).append("].nomCapacity(): ").append(nomCap);
+    println(sb.toString());
+  }
+
+  private void printStartCompress() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("COMPRESS: ");
+    sb.append("skSize: ").append(size).append(" >= ");
+    sb.append("MaxNomSize: ").append(maxNomSize);
+    sb.append("; N: ").append(totalN);
+    println(sb.toString());
+  }
+
+  private void printAllHorizList() {
+    for (int h = 0; h < compactors.size(); h++) {
+      final RelativeCompactor c = compactors.get(h);
+      print(c.toHorizontalList(0));
     }
+    println("COMPRESS: DONE: sKsize: " + size
+        + "\tMaxNomSize: " + maxNomSize + LS);
   }
 
   //  public double[] getCDF(final float[] splitPoints) {
   //    return getPmfOrCdf(splitPoints, true);
   //  }
 
+  /**
+   * Gets the total number of items offered to the sketch.
+   * @return the total number of items offered to the sketch.
+   */
+  public long getN() {
+    return totalN;
+  }
+
   @SuppressWarnings("static-method")
   private double[] getPmfOrCdf(final float[] splitPoints, final boolean isCdf) {
     return null;
@@ -243,7 +263,7 @@ public class RelativeErrorQuantiles {
     return (double)nnRank / totalN;
   }
 
-  Pair[] ranks() { //debug
+  Pair[] ranks() { //not yet used for debug
     return null;
   }
 
@@ -272,7 +292,7 @@ public class RelativeErrorQuantiles {
   }
 
   void update(final float item) {
-    if (!Float.isFinite(item)) { return; }
+    if (!Float.isFinite(item)) { return; } //TODO: We may want to throw here
 
     final RelativeCompactor c = compactors.get(0).append(item);
     size++;
diff --git a/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java b/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
index 2c8afb4..e0233c7 100644
--- a/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
+++ b/src/test/java/org/apache/datasketches/req/RelativeErrorSketchTest.java
@@ -19,6 +19,8 @@
 
 package org.apache.datasketches.req;
 
+import static org.apache.datasketches.QuantilesHelper.getEvenlySpacedRanks;
+
 import org.testng.annotations.Test;
 
 /**
@@ -30,20 +32,34 @@ public class RelativeErrorSketchTest {
 
   @Test
   public void test1() {
-    RelativeErrorQuantiles sk = new RelativeErrorQuantiles(4, true); //w debug
-    for (int i = 101; i-- > 1; ) {
-      sk.update(i);
+    RelativeErrorQuantiles sk = new RelativeErrorQuantiles(6, true); //w debug
+    int max = 200;
+    int min = 1;
+    boolean up = false;
+
+    if (up) {
+      for (int i = min; i <= max; i++) {
+        sk.update(i);
+      }
+    } else { //down
+      for (int i = max + 1; i-- > min; ) {
+        sk.update(i);
+      }
     }
     print(sk.getSummary(0));
 
-    for (float i = 10; i <= 100; i += 10) {
-      printRank(sk, i + .5f);
+    double[] ranks = getEvenlySpacedRanks(11);
+    println("Ranks Test:");
+    for (int i = 0; i < ranks.length; i++) {
+      printRank(sk, ((float)ranks[i] * (max - min)) + min);
     }
   }
 
   private static void printRank(RelativeErrorQuantiles sk, float v) {
     double r = sk.rank(v);
-    println("Normalized Rank: value: " + v + ", rank: " + r);
+    String rstr = String.format("%.2f", r);
+    String vstr = String.format("%.2f", v);
+    println("Value: " + vstr + ", Rank: " + rstr);
   }
 
   @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org