You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2022/04/30 00:14:03 UTC

[datasketches-characterization] branch master updated: Update KllSketchAccuracyProfile and Base

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git


The following commit(s) were added to refs/heads/master by this push:
     new 483f322  Update KllSketchAccuracyProfile and Base
483f322 is described below

commit 483f3220d4caad31d3312a6835b20f4a571e7b4f
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Fri Apr 29 17:13:56 2022 -0700

    Update KllSketchAccuracyProfile and Base
---
 .../kll/KllSketchAccuracyProfile.java              | 53 ++++++++--------------
 .../quantiles/BaseQuantilesAccuracyProfile.java    | 13 +++---
 2 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/src/main/java/org/apache/datasketches/characterization/kll/KllSketchAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/kll/KllSketchAccuracyProfile.java
index 28b4ae6..1d55893 100644
--- a/src/main/java/org/apache/datasketches/characterization/kll/KllSketchAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/kll/KllSketchAccuracyProfile.java
@@ -99,41 +99,24 @@ public class KllSketchAccuracyProfile extends BaseQuantilesAccuracyProfile {
   //prepare input data that will be permuted and associated arrays
   public void prepareTrialSet(final int streamLength) {
     this.streamLength = streamLength;
-    final int[] sortedArr;
 
     if (useDouble) {
-      if (uniformRandom) {
-        inputDoubleValues = new double[streamLength];
-        sortedArr = fillUniformRandomDoubles(inputDoubleValues);
-        //Compute true ranks & compress
-        doubleQueryValues = new double[streamLength];
-        trueRanks = getTrueRanksDoubles(sortedArr, doubleQueryValues);
-        compressDoubleQueryValues();
-      }
-      else { //consecutive
-        inputDoubleValues = new double[streamLength];
-        sortedArr = fillContiguousDoubles(inputDoubleValues);
-        //Compute true ranks
-        doubleQueryValues = new double[streamLength];
-        trueRanks = getTrueRanksDoubles(sortedArr, doubleQueryValues);
-      }
+      inputDoubleValues = new double[streamLength];
+      doubleQueryValues = new double[streamLength];
+      final int[] sortedArr = uniformRandom
+          ? fillUniformRandomDoubles(inputDoubleValues)
+          : fillContiguousDoubles(inputDoubleValues);
+      trueRanks = getTrueRanksDoubles(sortedArr, doubleQueryValues);
+      if (uniformRandom) { compressDoubleQueryValues(); }
     }
     else { //useFloats
-      if (uniformRandom) {
-        inputFloatValues = new float[streamLength];
-        sortedArr = fillUniformRandomFloats(inputFloatValues);
-        //Compute true ranks
-        floatQueryValues = new float[streamLength];
-        trueRanks = getTrueRanksFloats(sortedArr, floatQueryValues);
-        compressFloatQueryValues();
-      }
-      else { //consecutive
-        inputFloatValues = new float[streamLength];
-        sortedArr = fillContiguousFloats(inputFloatValues);
-        //Compute true ranks
-        floatQueryValues = new float[streamLength];
-        trueRanks = getTrueRanksFloats(sortedArr, floatQueryValues);
-      }
+      inputFloatValues = new float[streamLength];
+      floatQueryValues = new float[streamLength];
+      final int[] sortedArr = uniformRandom
+          ? fillUniformRandomFloats(inputFloatValues)
+          : fillContiguousFloats(inputFloatValues);
+      trueRanks = getTrueRanksFloats(sortedArr, floatQueryValues);
+      if (uniformRandom) { compressFloatQueryValuesAndTrueRanks(); }
     }
   }
 
@@ -167,17 +150,17 @@ public class KllSketchAccuracyProfile extends BaseQuantilesAccuracyProfile {
       // query sketch and gather results
       worstNegRankError = 0;
       worstPosRankError = 0;
-      final int qLen = trueRanks.length;
+      final int queryLen = trueRanks.length;
       if (useBulk) {
         final double[] estRanks = dskUT.getCDF(doubleQueryValues);
-        for (int i = 0; i < qLen; i++) {
+        for (int i = 0; i < queryLen; i++) {
           final double trueRank = (double) trueRanks[i] / streamLength;
           final double deltaRankErr = estRanks[i] - trueRank;
           if (deltaRankErr < 0) { worstNegRankError = Math.min(worstNegRankError, deltaRankErr); }
           else { worstPosRankError = Math.max(worstPosRankError, deltaRankErr); }
         }
       } else {
-        for (int i = 0; i < qLen; i++) {
+        for (int i = 0; i < queryLen; i++) {
           final double trueRank = (double) trueRanks[i] / streamLength;
           final double deltaRankErr = dskUT.getRank(i) - trueRank;
           if (deltaRankErr < 0) { worstNegRankError = Math.min(worstNegRankError, deltaRankErr); }
@@ -359,7 +342,7 @@ public class KllSketchAccuracyProfile extends BaseQuantilesAccuracyProfile {
     return trueRanks;
   }
 
-  final void compressFloatQueryValues() {
+  final void compressFloatQueryValuesAndTrueRanks() {
     //find num duplicates
     final int n = trueRanks.length;
     int dups = 0;
diff --git a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
index d1fd59e..cf62ac0 100644
--- a/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/quantiles/BaseQuantilesAccuracyProfile.java
@@ -59,7 +59,6 @@ import org.apache.datasketches.quantiles.UpdateDoublesSketch;
 public abstract class BaseQuantilesAccuracyProfile implements JobProfile {
 
   protected Job job;
-  private DoublesSketchBuilder builder;
   protected Properties props;
 
   //JobProfile
@@ -80,13 +79,13 @@ public abstract class BaseQuantilesAccuracyProfile implements JobProfile {
   private static final double[] G_QUANTILES_3SD = {M3SD, M2SD, M1SD, MED, P1SD, P2SD, P3SD};
 
   private void doTrials() {
-    final int lgMin = Integer.parseInt(job.getProperties().mustGet("lgMin"));
-    final int lgMax = Integer.parseInt(job.getProperties().mustGet("lgMax"));
-    final int ppo = Integer.parseInt(job.getProperties().mustGet("PPO"));
-    final int numTrials = Integer.parseInt(job.getProperties().mustGet("trials"));
-    final int errorSketchLgK = Integer.parseInt(job.getProperties().mustGet("errLgK"));
+    final int lgMin = Integer.parseInt(props.mustGet("lgMin"));
+    final int lgMax = Integer.parseInt(props.mustGet("lgMax"));
+    final int ppo = Integer.parseInt(props.mustGet("PPO"));
+    final int numTrials = Integer.parseInt(props.mustGet("trials"));
+    final int errorSketchLgK = Integer.parseInt(props.mustGet("errLgK"));
 
-    builder = DoublesSketch.builder().setK(1 << errorSketchLgK);
+    final DoublesSketchBuilder builder = DoublesSketch.builder().setK(1 << errorSketchLgK);
 
     configure();
     job.println("Epsilon:\t" + getEpsilon());


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org