You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by pa...@apache.org on 2021/01/11 11:07:26 UTC

[datasketches-characterization] branch ReqExperiment updated: two metrics for measuring accuracy times size

This is an automated email from the ASF dual-hosted git repository.

pavelvesely pushed a commit to branch ReqExperiment
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git


The following commit(s) were added to refs/heads/ReqExperiment by this push:
     new 8919a2e  two metrics for measuring accuracy times size
8919a2e is described below

commit 8919a2e06865fe8bd7a6e754f439d521420355a1
Author: Pavel Vesely <ve...@iuuk.mff.cuni.cz>
AuthorDate: Mon Jan 11 12:06:30 2021 +0100

    two metrics for measuring accuracy times size
---
 .../resources/quantiles/ReqSketchAccuracyJob.conf  | 11 +++++---
 .../quantiles/ReqSketchAccuracyProfile.java        | 32 ++++++++++++++++++++--
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
index 72a2b54..17a12dd 100644
--- a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
+++ b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
@@ -22,13 +22,13 @@ Pattern=Sorted # Sorted, Reversed, Zoomin, Zoomout, Random, Sqrt, FlipFlop
 Offset=1 #0 for min value of 0; 1 for min value of 1
 
 ## Stream lengths
-LgMin=24    # The starting stream length
-LgMax=24    # How high the stream length goes
+LgMin=26    # The starting stream length
+LgMax=26    # How high the stream length goes
 LgDelta=2   # If > 0, this is the lg Increment
 PPO=1       # The horizontal x-resolution of trials points
 
 # Trials config (indep of sketch)
-LgTrials=15 # lgTrials at every stream length
+LgTrials=12 # lgTrials at every stream length
 ErrQSkLgK=12   # the rank error distribution sketch LgK
 ErrHllSkLgK=12 # the rank error HLL sketch Lgk
 Shuffle=true # If true, shuffle before each trial
@@ -41,7 +41,7 @@ StdDev=1 # std deviation used when plotting LB, UB
 RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if HRA => 0.7 to 1.0 
 
 # Specific sketch config
-K=24 # sketch size and accuracy parameter
+K=12 # sketch size and accuracy parameter
 
 HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
 Compatible=false
@@ -59,6 +59,9 @@ TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT
 FileNameDateFormat=yyyyMMdd'_'HHmmssz
 ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
 
+# FOR SPECIAL METRICS CAPTURING ACCURACY PER BYTE
+MetricsRankRange = 0.3
+
 # TEMPORARY
 INIT_NUMBER_OF_SECTIONS = 3
 NOM_CAPACITY_MULTIPLIER = 2
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
index dfc9f15..4742c7e 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
@@ -69,6 +69,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
   private double exponent;
   private int sd;
   private double rankRange;
+  private double metricsRankRange;
 
   //Target sketch configuration & error analysis
   private int K;
@@ -159,6 +160,8 @@ public class ReqSketchAccuracyProfile implements JobProfile {
     hra = Boolean.parseBoolean(prop.mustGet("HRA"));
     ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
     
+
+    metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange"));
     
     INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
     NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
@@ -274,7 +277,13 @@ public class ReqSketchAccuracyProfile implements JobProfile {
 
       //sumAllocCounts = sk.
     }
-
+    
+    // for special metrics for capturing accuracy per byte
+    double sumRelStdDev = 0;
+    int numRelStdDev = 0;
+    double sumAddStdDev = 0;
+    int numAddStdDev = 0;
+    
     //at this point each of the errQSkArr sketches has a distribution of error from numTrials
     for (int pp = 0 ; pp < numPlotPoints; pp++) {
       final double v = sortedPPValues[pp];
@@ -291,10 +300,29 @@ public class ReqSketchAccuracyProfile implements JobProfile {
       job.printfData(fFmt, relPP, v, tr,
           errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
           rlb, rub, uErrCnt);
+
+      if (relPP > 0 && relPP < 1
+    	      && ((hra && relPP < metricsRankRange) || (!hra && relPP >= 1 - metricsRankRange))) {
+    	  sumAddStdDev += errQ[4];
+    	  numAddStdDev++;
+      }
+      if (relPP > 0 && relPP < 1
+    		  && ((!hra && relPP < metricsRankRange) || (hra && relPP >= 1 - metricsRankRange))) {
+        sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
+    	  numRelStdDev++;
+      }
       errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
       errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
     }
-    job.println(LS + "Serialization Bytes: " + sk.getSerializationBytes());
+    int serBytes = sk.getSerializationBytes();
+
+    // special metrics for capturing accuracy per byte
+    double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev;
+    double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev;
+    job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize);
+    job.println(     "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize);
+
+    job.println(LS + "Serialization Bytes: " + serBytes);
     job.println(sk.viewCompactorDetail("%5.0f", false));
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org