You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2022/07/13 22:25:31 UTC

[datasketches-characterization] branch matchJava4.0.0 created (now 98d2834)

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a change to branch matchJava4.0.0
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git


      at 98d2834  These changes update the various profiles and methods in Characterization as a result of some recent changes in ds-java master.

This branch includes the following new commits:

     new 98d2834  These changes update the various profiles and methods in Characterization as a result of some recent changes in ds-java master.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-characterization] 01/01: These changes update the various profiles and methods in Characterization as a result of some recent changes in ds-java master.

Posted by le...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch matchJava4.0.0
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git

commit 98d2834ab08867224a8a1d53b878b97649f324a1
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Wed Jul 13 15:25:26 2022 -0700

    These changes update the various profiles and methods in
    Characterization as a result of some recent changes in ds-java master.
---
 .../org/apache/datasketches/MonotonicPoints.java   |  4 +-
 .../characterization/AccuracyStats.java            | 43 ++++++++++++++++++--
 .../characterization/BoundsAccuracyStats.java      |  2 +-
 .../characterization/fdt/FdtAccuracyProfile.java   |  4 +-
 .../frequencies/BaseFrequenciesSpeedProfile.java   |  2 +-
 .../hash/BaseHashSpeedProfile.java                 |  2 +-
 .../hll/HllConfidenceIntervalInverseProfile.java   |  2 +-
 ...llDoublesSketchRankGaussianAccuracyProfile.java |  2 +-
 ...KllFloatsSketchRankGaussianAccuracyProfile.java |  2 +-
 .../kll/KllSketchSizeSpeedProfile.java             |  2 +-
 .../characterization/memory/BaseSpeedProfile.java  |  2 +-
 .../memory/BaseUtf8SpeedProfile.java               |  2 +-
 .../quantiles/BaseQuantilesAccuracyProfile.java    |  2 +-
 .../quantiles/BaseQuantilesSpeedProfile.java       |  2 +-
 .../tdigest/QuantilesAccuracyProfile.java          |  2 +-
 .../quantiles/tdigest/QuantilesSpeedProfile.java   |  2 +-
 .../req/ReqSketchAccuracyProfile.java              |  2 +-
 .../req/ReqSketchSizeSpeedProfile.java             |  2 +-
 ...ile.java => ThetaIntersectAccuracyProfile.java} | 13 +++---
 .../uniquecount/BaseAccuracyProfile.java           | 47 +++++++++++++++-------
 .../uniquecount/BaseBoundsAccuracyProfile.java     |  2 +-
 .../uniquecount/BaseSerDeProfile.java              |  2 +-
 .../uniquecount/BaseUpdateSpeedProfile.java        |  2 +-
 ...tionJob.conf => ThetaIntersectAccuracyJob.conf} | 33 ++++++++-------
 .../apache/datasketches/MonotonicPointsTest.java   |  2 +-
 25 files changed, 121 insertions(+), 61 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/MonotonicPoints.java b/src/main/java/org/apache/datasketches/MonotonicPoints.java
index 31f3898..1319cac 100644
--- a/src/main/java/org/apache/datasketches/MonotonicPoints.java
+++ b/src/main/java/org/apache/datasketches/MonotonicPoints.java
@@ -150,8 +150,8 @@ public class MonotonicPoints {
    * @return the actual number of plotting points between lgStart and lgEnd.
    */
   public static final int countPoints(final int lgStart, final int lgEnd, final int ppo) {
-    int p = 1 << lgStart;
-    final int end = 1 << lgEnd;
+    long p = 1L << lgStart;
+    final long end = 1L << lgEnd;
     int count = 0;
     while (p <= end) {
       p = pwr2SeriesNext(ppo, p);
diff --git a/src/main/java/org/apache/datasketches/characterization/AccuracyStats.java b/src/main/java/org/apache/datasketches/characterization/AccuracyStats.java
index f9bd056..37942ea 100644
--- a/src/main/java/org/apache/datasketches/characterization/AccuracyStats.java
+++ b/src/main/java/org/apache/datasketches/characterization/AccuracyStats.java
@@ -39,16 +39,31 @@ public class AccuracyStats {
   public double sumRelErr = 0;
   public double sumSqErr = 0;
   public double rmsre = 0; //used later for plotting, set externally
-  public double trueValue; //set by constructor
+  public double trueValue; //set by constructor, used only for error analysis
+  public long uniques;     //set by constructor, used as a coordinate
   public int bytes = 0;
 
   /**
+   * Used for single sketch or union accuracy.
    * @param k the configuration value for the quantiles sketch. It must be a power of two.
    * @param trueValue the true value
    */
   public AccuracyStats(final int k, final long trueValue) {
     qsk = new DoublesSketchBuilder().setK(k).build(); //Quantiles
     this.trueValue = trueValue;
+    this.uniques = trueValue;
+  }
+
+  /**
+   * Used for intersection accuracy
+   * @param k the configuration value for the quantiles sketch. It must be a power of two.
+   * @param trueValue the true value
+   * @param uniques number of uniques, used as a coordinate in intersection testing.
+   */
+  public AccuracyStats(final int k, final long trueValue, final long uniques) {
+    qsk = new DoublesSketchBuilder().setK(k).build(); //Quantiles
+    this.trueValue = trueValue;
+    this.uniques = uniques;
   }
 
   /**
@@ -76,7 +91,7 @@ public class AccuracyStats {
       final int lgMin, final int lgMax, final int ppo, final int lgQK) {
     final int qLen = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
     final AccuracyStats[] qArr = new AccuracyStats[qLen];
-    int p = 1 << lgMin;
+    long p = 1L << lgMin;
     for (int i = 0; i < qLen; i++) {
       qArr[i] = new AccuracyStats(1 << lgQK, p);
       p = pwr2SeriesNext(ppo, p);
@@ -84,6 +99,28 @@ public class AccuracyStats {
     return qArr;
   }
 
+  /**
+   * Build the AccuracyStats Array for Intersection.
+   * All elements of the AccuracyStats array have 2^lgMin values as the trueValue.
+   * @param lgMin log_base2 of the minimum number of uniques used
+   * @param lgMax log_base2 of the maximum number of uniques used
+   * @param ppo the number of points per octave
+   * @param lgQK the lgK for the Quantiles sketch
+   * @return an AccuracyStats array
+   */
+  public static final AccuracyStats[] buildLog2IntersectAccuracyStatsArray(
+      final int lgMin, final int lgMax, final int ppo, final int lgQK) {
+    final int qLen = MonotonicPoints.countPoints(lgMin, lgMax, ppo);
+    final AccuracyStats[] qArr = new AccuracyStats[qLen];
+    final long trueValue = 1L << lgMin;
+    long p = trueValue; //becomes the uniques coordinate
+    for (int i = 0; i < qLen; i++) {
+      qArr[i] = new AccuracyStats(1 << lgQK, trueValue, p);
+      p = pwr2SeriesNext(ppo, p);
+    }
+    return qArr;
+  }
+
   /**
    * Build the AccuracyStats Array
    * @param log10Min log_base2 of the minimum number of uniques used
@@ -99,7 +136,7 @@ public class AccuracyStats {
     long p = round(pow(10, log10Min));
     for (int i = 0; i < qLen; i++) {
       qArr[i] = new AccuracyStats(1 << lgQK, p);
-      p = (int) powerSeriesNextDouble(ppb, p, true, 10.0);
+      p = (long) powerSeriesNextDouble(ppb, p, true, 10.0);
     }
     return qArr;
   }
diff --git a/src/main/java/org/apache/datasketches/characterization/BoundsAccuracyStats.java b/src/main/java/org/apache/datasketches/characterization/BoundsAccuracyStats.java
index 4451fea..aedbb77 100644
--- a/src/main/java/org/apache/datasketches/characterization/BoundsAccuracyStats.java
+++ b/src/main/java/org/apache/datasketches/characterization/BoundsAccuracyStats.java
@@ -87,7 +87,7 @@ public class BoundsAccuracyStats {
     int p = 1 << lgMin;
     for (int i = 0; i < qLen; i++) {
       qArr[i] = new BoundsAccuracyStats(1 << lgQK, p);
-      p = pwr2SeriesNext(ppo, p);
+      p = (int)pwr2SeriesNext(ppo, p);
     }
     return qArr;
   }
diff --git a/src/main/java/org/apache/datasketches/characterization/fdt/FdtAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/fdt/FdtAccuracyProfile.java
index 2531505..2eff30f 100644
--- a/src/main/java/org/apache/datasketches/characterization/fdt/FdtAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/fdt/FdtAccuracyProfile.java
@@ -129,7 +129,7 @@ public class FdtAccuracyProfile implements JobProfile {
     slope = getSlope(p1, p2);
     xPoints = 0;
     int xG;
-    for (xG = minG; xG <= maxG; xG = pwr2SeriesNext(gPPO, xG)) {
+    for (xG = minG; xG <= maxG; xG = (int)pwr2SeriesNext(gPPO, xG)) {
       xPoints++;
     }
   }
@@ -139,7 +139,7 @@ public class FdtAccuracyProfile implements JobProfile {
     groupsGenerated = 0;
     sketchUpdates = 0;
     int xG, yU;
-    for (xG = minG; xG <= maxG; xG = pwr2SeriesNext(gPPO, xG)) { //select major group
+    for (xG = minG; xG <= maxG; xG = (int)pwr2SeriesNext(gPPO, xG)) { //select major group
       groupsGenerated += xG;
       yU = (int) Math.round(getY(p1, slope, xG)); //compute target # uniques
       for (int g = 1; g <= xG; g++) { //select the minor group
diff --git a/src/main/java/org/apache/datasketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
index bda641b..370b44d 100644
--- a/src/main/java/org/apache/datasketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/frequencies/BaseFrequenciesSpeedProfile.java
@@ -65,7 +65,7 @@ public abstract class BaseFrequenciesSpeedProfile implements JobProfile {
         doTrial();
       }
       job.println(getStats(streamLength, numTrials));
-      streamLength = pwr2SeriesNext(pointsPerOctave, streamLength);
+      streamLength = (int)pwr2SeriesNext(pointsPerOctave, streamLength);
     }
   }
 
diff --git a/src/main/java/org/apache/datasketches/characterization/hash/BaseHashSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/hash/BaseHashSpeedProfile.java
index d0a35bf..c592900 100644
--- a/src/main/java/org/apache/datasketches/characterization/hash/BaseHashSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/hash/BaseHashSpeedProfile.java
@@ -98,7 +98,7 @@ public abstract class BaseHashSpeedProfile implements JobProfile {
     final int minX = 1 << lgMinX;
     int lastX = 0;
     while (lastX < maxX) {
-      final int nextX = lastX == 0 ? minX : pwr2SeriesNext(xPPO, lastX);
+      final int nextX = lastX == 0 ? minX : (int)pwr2SeriesNext(xPPO, lastX);
       lastX = nextX;
       final int trials = getNumTrials(nextX);
       p.reset(nextX, trials);
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
index 51991ba..30eea47 100644
--- a/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/hll/HllConfidenceIntervalInverseProfile.java
@@ -161,7 +161,7 @@ public class HllConfidenceIntervalInverseProfile implements JobProfile {
     //This will generate a table of data for each intermediate Trials point
     int lastT = 0;
     while (lastT < maxT) {
-      final int nextT = lastT == 0 ? minT : pwr2SeriesNext(tPPO, lastT);
+      final int nextT = lastT == 0 ? minT : (int)pwr2SeriesNext(tPPO, lastT);
       final int delta = nextT - lastT;
       for (int i = 0; i < delta; i++) {
         doTrial();
diff --git a/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
index d6db0e7..5e8e660 100644
--- a/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/kll/KllDoublesSketchRankGaussianAccuracyProfile.java
@@ -155,7 +155,7 @@ public class KllDoublesSketchRankGaussianAccuracyProfile implements JobProfile {
       doStreamLength(streamLength);
       //go to next stream length
       if (useppo) {
-        streamLength = pwr2SeriesNext(ppo, streamLength);
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
       } else {
         lgCurSL += lgDelta;
         streamLength = 1 << lgCurSL;
diff --git a/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
index f553db7..0ca697b 100644
--- a/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/kll/KllFloatsSketchRankGaussianAccuracyProfile.java
@@ -161,7 +161,7 @@ public class KllFloatsSketchRankGaussianAccuracyProfile implements JobProfile {
       doStreamLength(streamLength);
       //go to next stream length
       if (useppo) {
-        streamLength = pwr2SeriesNext(ppo, streamLength);
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
       } else {
         lgCurSL += lgDelta;
         streamLength = 1 << lgCurSL;
diff --git a/src/main/java/org/apache/datasketches/characterization/kll/KllSketchSizeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/kll/KllSketchSizeSpeedProfile.java
index 2ea9095..6302ea5 100644
--- a/src/main/java/org/apache/datasketches/characterization/kll/KllSketchSizeSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/kll/KllSketchSizeSpeedProfile.java
@@ -135,7 +135,7 @@ public class KllSketchSizeSpeedProfile implements JobProfile {
     job.printf(sFmt, (Object[]) columnLabels); //Header
     int pp = 1;
     while (lastSL < maxSL) { //Trials for each plotPoint on X-axis, and one row on output
-      final int nextSL = lastSL == 0 ? minSL : pwr2SeriesNext(ppoSL, lastSL);
+      final int nextSL = lastSL == 0 ? minSL : (int)pwr2SeriesNext(ppoSL, lastSL);
       lastSL = nextSL;
       final int trials = getNumTrials(nextSL);
 
diff --git a/src/main/java/org/apache/datasketches/characterization/memory/BaseSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/memory/BaseSpeedProfile.java
index 0b05ebe..b7943e0 100644
--- a/src/main/java/org/apache/datasketches/characterization/memory/BaseSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/memory/BaseSpeedProfile.java
@@ -117,7 +117,7 @@ static class Point {
     final int minX = 1 << lgMinX;
     int lastX = 0;
     while (lastX < maxX) {
-      final int nextX = lastX == 0 ? minX : pwr2SeriesNext(xPPO, lastX);
+      final int nextX = lastX == 0 ? minX : (int)pwr2SeriesNext(xPPO, lastX);
       lastX = nextX;
       final int trials = getNumTrials(nextX);
       configure(nextX);
diff --git a/src/main/java/org/apache/datasketches/characterization/memory/BaseUtf8SpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/memory/BaseUtf8SpeedProfile.java
index 919afb9..4f29913 100644
--- a/src/main/java/org/apache/datasketches/characterization/memory/BaseUtf8SpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/memory/BaseUtf8SpeedProfile.java
@@ -154,7 +154,7 @@ public abstract class BaseUtf8SpeedProfile implements JobProfile {
     final int minX = 1 << lgMinX;
     int lastX = 0;
     while (lastX < maxX) { //do each plot point on the X-axis
-      final int nextX = lastX == 0 ? minX : pwr2SeriesNext(xPPO, lastX);
+      final int nextX = lastX == 0 ? minX : (int)pwr2SeriesNext(xPPO, lastX);
       lastX = nextX;
       final int trials = getNumTrials(nextX);
       //configure();
diff --git a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
index 9cb7d7d..bfffde9 100644
--- a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
@@ -107,7 +107,7 @@ public abstract class BaseQuantilesAccuracyProfile implements JobProfile {
       job.println(streamLength + "\t"
           + String.format("%.16f\t%.16f\t%.16f\t%.16f\t%.16f\t%.16f\t%.16f",
               qArr[0], qArr[1], qArr[2], qArr[3], qArr[4], qArr[5], qArr[6]));
-      streamLength = pwr2SeriesNext(ppo, streamLength);
+      streamLength = (int)pwr2SeriesNext(ppo, streamLength);
     }
     job.println("");
   }
diff --git a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesSpeedProfile.java
index 7762e2d..efbbd7c 100644
--- a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesSpeedProfile.java
@@ -69,7 +69,7 @@ public abstract class BaseQuantilesSpeedProfile implements JobProfile {
         doTrial();
       }
       job.println(getStats(streamLength, numTrials, numQueryValues));
-      streamLength = pwr2SeriesNext(pointsPerOctave, streamLength);
+      streamLength = (int)pwr2SeriesNext(pointsPerOctave, streamLength);
     }
   }
 
diff --git a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
index 248b1f4..2ca7ffc 100644
--- a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesAccuracyProfile.java
@@ -66,7 +66,7 @@ public abstract class QuantilesAccuracyProfile implements JobProfile {
       }
       job.println(streamLength + "\t"
           + String.format("%.2f", rankErrorSketch.getQuantile((double) errorPct / 100) * 100));
-      streamLength = pwr2SeriesNext(ppo, streamLength);
+      streamLength = (int)pwr2SeriesNext(ppo, streamLength);
     }
   }
 
diff --git a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
index caba3bd..7ce7587 100644
--- a/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/quantiles/tdigest/QuantilesSpeedProfile.java
@@ -61,7 +61,7 @@ public abstract class QuantilesSpeedProfile implements JobProfile {
         doTrial();
       }
       job.println(getStats(streamLength, numTrials, numQueryValues));
-      streamLength = pwr2SeriesNext(pointsPerOctave, streamLength);
+      streamLength = (int)pwr2SeriesNext(pointsPerOctave, streamLength);
     }
   }
 
diff --git a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
index 1682f9d..8dd8c85 100644
--- a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchAccuracyProfile.java
@@ -222,7 +222,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
 
       //go to next stream length
       if (useppo) {
-        streamLength = pwr2SeriesNext(ppo, streamLength);
+        streamLength = (int)pwr2SeriesNext(ppo, streamLength);
       } else {
         lgCurSL += lgDelta;
         streamLength = 1 << lgCurSL;
diff --git a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchSizeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchSizeSpeedProfile.java
index 4e52c36..5ac1031 100644
--- a/src/main/java/org/apache/datasketches/characterization/req/ReqSketchSizeSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/req/ReqSketchSizeSpeedProfile.java
@@ -128,7 +128,7 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
     job.printf(sFmt, (Object[]) columnLabels); //Header
     int pp = 1;
     while (lastSL < maxSL) { //Trials for each plotPoint on X-axis, and one row on output
-      final int nextSL = lastSL == 0 ? minSL : pwr2SeriesNext(ppoSL, lastSL);
+      final int nextSL = lastSL == 0 ? minSL : (int)pwr2SeriesNext(ppoSL, lastSL);
       lastSL = nextSL;
       final int trials = getNumTrials(nextSL);
 
diff --git a/src/main/java/org/apache/datasketches/characterization/theta/ThetaAccuracyIntersectionProfile.java b/src/main/java/org/apache/datasketches/characterization/theta/ThetaIntersectAccuracyProfile.java
similarity index 85%
rename from src/main/java/org/apache/datasketches/characterization/theta/ThetaAccuracyIntersectionProfile.java
rename to src/main/java/org/apache/datasketches/characterization/theta/ThetaIntersectAccuracyProfile.java
index a47a275..e7aa081 100644
--- a/src/main/java/org/apache/datasketches/characterization/theta/ThetaAccuracyIntersectionProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/theta/ThetaIntersectAccuracyProfile.java
@@ -28,7 +28,7 @@ import org.apache.datasketches.theta.SetOperationBuilder;
 import org.apache.datasketches.theta.UpdateSketch;
 import org.apache.datasketches.theta.UpdateSketchBuilder;
 
-public class ThetaAccuracyIntersectionProfile extends BaseAccuracyProfile {
+public class ThetaIntersectAccuracyProfile extends BaseAccuracyProfile {
   private int myLgK; //avoids temporary conflict with BaseAccuracyProfile
   private boolean rebuild;
   private UpdateSketch skSm;
@@ -37,12 +37,13 @@ public class ThetaAccuracyIntersectionProfile extends BaseAccuracyProfile {
 
   @Override
   public void configure() {
+    if (!intersectTest) { throw new IllegalArgumentException("Missing intersectTest parameter"); }
     //Theta Sketch Profile
     myLgK = Integer.parseInt(prop.mustGet("LgK"));
-    rebuild = Boolean.parseBoolean(prop.mustGet("Rebuild"));
+    rebuild = Boolean.parseBoolean(prop.mustGet("THETA_rebuild"));
     final Family family = Family.stringToFamily(prop.mustGet("THETA_famName"));
-    final ResizeFactor rf = ResizeFactor.getRF(Integer.parseInt(prop.mustGet("LgRF")));
-    final float p = Float.parseFloat(prop.mustGet("P"));
+    final ResizeFactor rf = ResizeFactor.getRF(Integer.parseInt(prop.mustGet("THETA_lgRF")));
+    final float p = Float.parseFloat(prop.mustGet("THETA_p"));
     //final boolean direct = Boolean.parseBoolean(prop.mustGet("Direct"));
     final UpdateSketchBuilder udBldr = new UpdateSketchBuilder()
       .setLogNominalEntries(myLgK)
@@ -61,10 +62,11 @@ public class ThetaAccuracyIntersectionProfile extends BaseAccuracyProfile {
     final int qArrLen = qArr.length;
     skSm.reset();
     skLg.reset();
+    //intersection.reset();
     long lastUniques = 0;
     for (int i = 0; i < qArrLen; i++) {
       final AccuracyStats q = qArr[i];
-      final long delta = (long)(q.trueValue - lastUniques);
+      final long delta = (q.uniques - lastUniques);
       for (long u = 0; u < delta; u++) {
         if (i == 0) { skSm.update(vIn); }
         skLg.update(vIn++);
@@ -75,6 +77,7 @@ public class ThetaAccuracyIntersectionProfile extends BaseAccuracyProfile {
         skLg.rebuild();
       }
       final double est = intersection.intersect(skLg, skSm).getEstimate();
+      //final double est = skLg.getEstimate();
       q.update(est);
     }
   }
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
index 27a639a..64fc19d 100644
--- a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseAccuracyProfile.java
@@ -46,6 +46,7 @@ public abstract class BaseAccuracyProfile implements JobProfile {
   public int lgK;
   boolean interData;
   boolean postPMFs;
+  public boolean intersectTest = false;
   public boolean getSize = false;
   public AccuracyStats[] qArr;
 
@@ -69,8 +70,13 @@ public abstract class BaseAccuracyProfile implements JobProfile {
     //Sketch Profile
     lgK = Integer.parseInt(prop.mustGet("LgK"));
 
-    qArr = AccuracyStats.buildLog2AccuracyStatsArray(lgMinU, lgMaxU, uPPO, lgQK);
-
+    final String iKey = prop.get("IntersectTest");
+    intersectTest = (iKey == null) ? false : Boolean.parseBoolean(iKey);
+    if (intersectTest) {
+      qArr = AccuracyStats.buildLog2IntersectAccuracyStatsArray(lgMinU, lgMaxU, uPPO, lgQK);
+    } else {
+      qArr = AccuracyStats.buildLog2AccuracyStatsArray(lgMinU, lgMaxU, uPPO, lgQK);
+    }
     final String getSizeStr = prop.get("Trials_bytes");
     getSize = getSizeStr == null ? false : Boolean.parseBoolean(getSizeStr);
     configure();
@@ -114,12 +120,12 @@ public abstract class BaseAccuracyProfile implements JobProfile {
   private void doTrials() {
     final int minT = 1 << lgMinT;
     final int maxT = 1 << lgMaxT;
-    final int maxU = 1 << lgMaxU;
+    final long maxU = 1L << lgMaxU;
 
     //This will generate a table of data for each intermediate Trials point
     int lastTpt = 0;
     while (lastTpt < maxT) {
-      final int nextT = lastTpt == 0 ? minT : pwr2SeriesNext(tPPO, lastTpt);
+      final int nextT = lastTpt == 0 ? minT : (int)pwr2SeriesNext(tPPO, lastTpt);
       final int delta = nextT - lastTpt;
       for (int i = 0; i < delta; i++) {
         doTrial();
@@ -165,24 +171,27 @@ public abstract class BaseAccuracyProfile implements JobProfile {
     }
   }
 
-  private static void process(final boolean getSize, final AccuracyStats[] qArr,
+  private void process(final boolean getSize, final AccuracyStats[] qArr,
       final int cumTrials, final StringBuilder sb) {
 
     final int points = qArr.length;
     sb.setLength(0);
     for (int pt = 0; pt < points; pt++) {
       final AccuracyStats q = qArr[pt];
+      final double largeUniques = q.uniques;
       final double trueUniques = q.trueValue;
       final double meanEst = q.sumEst / cumTrials;
       final double meanRelErr = q.sumRelErr / cumTrials;
       final double meanSqErr = q.sumSqErr / cumTrials; //intermediate value
-      final double normMeanSqErr = meanSqErr / (1.0 * trueUniques * trueUniques); //intermediate value
+      final double normMeanSqErr = meanSqErr / (trueUniques * trueUniques); //intermediate value
       final double rmsRelErr = Math.sqrt(normMeanSqErr); //a.k.a. Normalied RMS Error or NRMSE
       q.rmsre = rmsRelErr;
       final int bytes = q.bytes;
 
       //OUTPUT
-      //sb.setLength(0);
+      if (intersectTest) {
+        sb.append(largeUniques).append(TAB);
+      }
       sb.append(trueUniques).append(TAB);
 
       //Sketch meanEst, meanEstErr, norm RMS Err
@@ -210,16 +219,19 @@ public abstract class BaseAccuracyProfile implements JobProfile {
     }
   }
 
-  private static String getHeader() {
+  private String getHeader() {
     final StringBuilder sb = new StringBuilder();
-    sb.append("InU").append(TAB);        //col 1
+    if (intersectTest) {
+      sb.append("LargeU").append(TAB);
+    }
+    sb.append("TrueU").append(TAB);        //col 1
     //Estimates
-    sb.append("MeanEst").append(TAB);    //col 2
-    sb.append("MeanRelErr").append(TAB); //col 3
-    sb.append("RMS_RE").append(TAB);     //col 4
+    sb.append("MeanEst").append(TAB);
+    sb.append("MeanRelErr").append(TAB);
+    sb.append("RMS_RE").append(TAB);
 
     //Trials
-    sb.append("Trials").append(TAB);     //col 5
+    sb.append("Trials").append(TAB);
 
     //Quantiles
     sb.append("Min").append(TAB);
@@ -242,7 +254,7 @@ public abstract class BaseAccuracyProfile implements JobProfile {
    * Outputs the Probability Mass Function given the AccuracyStats.
    * @param q the given AccuracyStats
    */
-  private static String outputPMF(final AccuracyStats q) {
+  private String outputPMF(final AccuracyStats q) {
     final DoublesSketch qSk = q.qsk;
     final double[] splitPoints = qSk.getQuantiles(GAUSSIANS_4SD); //1:1
     final double[] reducedSp = reduceSplitPoints(splitPoints);
@@ -253,7 +265,12 @@ public abstract class BaseAccuracyProfile implements JobProfile {
     //output Histogram
     final String hdr = String.format("%10s%4s%12s", "Trials", "    ", "Est");
     final String fmt = "%10d%4s%12.2f";
-    sb.append("Histogram At " + q.trueValue).append(LS);
+    if (intersectTest) {
+      sb.append("Intersect Histogram At " + q.uniques).append(LS);
+    } else {
+      sb.append("Histogram At " + q.uniques).append(LS);
+    }
+
     sb.append(hdr).append(LS);
     for (int i = 0; i < reducedSp.length; i++) {
       final int hits = (int)(pmfArr[i + 1] * trials);
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
index f301a9e..c777e7a 100644
--- a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseBoundsAccuracyProfile.java
@@ -115,7 +115,7 @@ public abstract class BaseBoundsAccuracyProfile implements JobProfile {
     //This will generate a table of data up for each intermediate Trials point
     int lastT = 0;
     while (lastT < maxT) {
-      final int nextT = lastT == 0 ? minT : pwr2SeriesNext(tPPO, lastT);
+      final int nextT = lastT == 0 ? minT : (int)pwr2SeriesNext(tPPO, lastT);
       final int delta = nextT - lastT;
       for (int i = 0; i < delta; i++) {
         doTrial();
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
index 6f3f1c7..5009470 100644
--- a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
@@ -103,7 +103,7 @@ public abstract class BaseSerDeProfile implements JobProfile {
     job.println(getHeader());
 
     while (lastU < maxU) { //for each U point on X-axis, OR one row on output
-      final int nextU = lastU == 0 ? minU : pwr2SeriesNext(uPPO, lastU);
+      final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO, lastU);
       lastU = nextU;
       final int trials = getNumTrials(nextU);
 
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseUpdateSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseUpdateSpeedProfile.java
index 6ac98a8..b28ae6d 100644
--- a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseUpdateSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseUpdateSpeedProfile.java
@@ -92,7 +92,7 @@ public abstract class BaseUpdateSpeedProfile implements JobProfile {
     final StringBuilder dataStr = new StringBuilder();
     job.println(getHeader());
     while (lastU < maxU) { //Trials for each U point on X-axis, and one row on output
-      final int nextU = lastU == 0 ? minU : pwr2SeriesNext(uPPO, lastU);
+      final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO, lastU);
       lastU = nextU;
       final int trials = getNumTrials(nextU);
 
diff --git a/src/main/resources/theta/ThetaAccuracyIntersectionJob.conf b/src/main/resources/theta/ThetaIntersectAccuracyJob.conf
similarity index 57%
rename from src/main/resources/theta/ThetaAccuracyIntersectionJob.conf
rename to src/main/resources/theta/ThetaIntersectAccuracyJob.conf
index 88922c7..6ab74bf 100644
--- a/src/main/resources/theta/ThetaAccuracyIntersectionJob.conf
+++ b/src/main/resources/theta/ThetaIntersectAccuracyJob.conf
@@ -18,7 +18,7 @@
 # Used by Job
 
 ## Job Profile
-JobProfile=org.apache.datasketches.characterization.theta.ThetaAccuracyIntersectionProfile
+JobProfile=org.apache.datasketches.characterization.theta.ThetaIntersectAccuracyProfile
 
 ## Date-Time Profile
 TimeZone=PST
@@ -28,28 +28,31 @@ ReadableDateFormat=yyyy/MM/dd HH:mm:ss
 
 # Used by BaseAccuracyProfile
 
-## Uniques Profile
-Trials_lgMinU=20  #The starting # of uniques that is printed at the end.
-Trials_lgMaxU=24  #How high the # uniques go
-Trials_UPPO=1     #The horizontal x-resolution of trials points
+## For Intersection testing
+IntersectTest=true
 
-## Trials Profile
-Trials_lgMinT=6   #prints intermediate results starting w/ this lgMinT
-Trials_lgMaxT=6   #The max trials
-Trials_TPPO=1     #how often intermediate results are printed
+## Range of uniques executed in a single trial
+Trials_lgMinU=16  #The starting number of uniques 
+Trials_lgMaxU=30  #The ending number of uniques
+Trials_UPPO=1     #The number of rows per octave of uniques
 
-Trials_lgQK=12   #size of quantiles sketch
-Trials_interData=true
-Trials_postPMFs=false
+## Trials profile. 
+Trials_lgMinT=2   #The starting number of trials
+Trials_lgMaxT=9  #The ending number of trials
+Trials_TPPO=4     #how often intermediate results are printed per octave
 
-Trials_bytes=false
+Trials_lgQK=12         #size of quantiles sketch
+Trials_interData=true  # allows printing of intermediate results
+Trials_postPMFs=false  #optional but expensive
+
+Trials_bytes=false #optional but expensive
 
 # Used by ThetaAccuracyIntersectionVsIEProfile
 
 ## Theta Sketch Profile
 LgK=12
-THETA_famName=QUICKSELECT #QUICKSELECT Cannot use ALPHA until 0.10.4
-THETA_lgRF=0     #set the log resize factor to 0 (RF = 1)
+THETA_famName=QUICKSELECT #QUICKSELECT
+THETA_lgRF=0              #set the log resize factor to 0 (RF = 1)
 THETA_p=1.0
 THETA_direct=false
 THETA_rebuild=true
diff --git a/src/test/java/org/apache/datasketches/MonotonicPointsTest.java b/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
index d6a45d2..365679b 100644
--- a/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
+++ b/src/test/java/org/apache/datasketches/MonotonicPointsTest.java
@@ -67,7 +67,7 @@ public class MonotonicPointsTest {
     int q = start;
     while (q <= end) {
       println(q);
-      q = pwr2SeriesNext(4, q);
+      q = (int)pwr2SeriesNext(4, q);
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org