You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by pa...@apache.org on 2021/01/11 11:07:26 UTC
[datasketches-characterization] branch ReqExperiment updated: two
metrics for measuring accuracy times size
This is an automated email from the ASF dual-hosted git repository.
pavelvesely pushed a commit to branch ReqExperiment
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/ReqExperiment by this push:
new 8919a2e two metrics for measuring accuracy times size
8919a2e is described below
commit 8919a2e06865fe8bd7a6e754f439d521420355a1
Author: Pavel Vesely <ve...@iuuk.mff.cuni.cz>
AuthorDate: Mon Jan 11 12:06:30 2021 +0100
two metrics for measuring accuracy times size
---
.../resources/quantiles/ReqSketchAccuracyJob.conf | 11 +++++---
.../quantiles/ReqSketchAccuracyProfile.java | 32 ++++++++++++++++++++--
2 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
index 72a2b54..17a12dd 100644
--- a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
+++ b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
@@ -22,13 +22,13 @@ Pattern=Sorted # Sorted, Reversed, Zoomin, Zoomout, Random, Sqrt, FlipFlop
Offset=1 #0 for min value of 0; 1 for min value of 1
## Stream lengths
-LgMin=24 # The starting stream length
-LgMax=24 # How high the stream length goes
+LgMin=26 # The starting stream length
+LgMax=26 # How high the stream length goes
LgDelta=2 # If > 0, this is the lg Increment
PPO=1 # The horizontal x-resolution of trials points
# Trials config (indep of sketch)
-LgTrials=15 # lgTrials at every stream length
+LgTrials=12 # lgTrials at every stream length
ErrQSkLgK=12 # the rank error distribution sketch LgK
ErrHllSkLgK=12 # the rank error HLL sketch Lgk
Shuffle=true # If true, shuffle before each trial
@@ -41,7 +41,7 @@ StdDev=1 # std deviation used when plotting LB, UB
RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if HRA => 0.7 to 1.0
# Specific sketch config
-K=24 # sketch size and accuracy parameter
+K=12 # sketch size and accuracy parameter
HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
Compatible=false
@@ -59,6 +59,9 @@ TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT
FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss
+# FOR SPECIAL METRICS CAPTURING ACCURACY PER BYTE
+MetricsRankRange = 0.3
+
# TEMPORARY
INIT_NUMBER_OF_SECTIONS = 3
NOM_CAPACITY_MULTIPLIER = 2
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
index dfc9f15..4742c7e 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
@@ -69,6 +69,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
private double exponent;
private int sd;
private double rankRange;
+ private double metricsRankRange;
//Target sketch configuration & error analysis
private int K;
@@ -159,6 +160,8 @@ public class ReqSketchAccuracyProfile implements JobProfile {
hra = Boolean.parseBoolean(prop.mustGet("HRA"));
ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+
+ metricsRankRange = Double.parseDouble(prop.mustGet("MetricsRankRange"));
INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
@@ -274,7 +277,13 @@ public class ReqSketchAccuracyProfile implements JobProfile {
//sumAllocCounts = sk.
}
-
+
+ // for special metrics for capturing accuracy per byte
+ double sumRelStdDev = 0;
+ int numRelStdDev = 0;
+ double sumAddStdDev = 0;
+ int numAddStdDev = 0;
+
//at this point each of the errQSkArr sketches has a distribution of error from numTrials
for (int pp = 0 ; pp < numPlotPoints; pp++) {
final double v = sortedPPValues[pp];
@@ -291,10 +300,29 @@ public class ReqSketchAccuracyProfile implements JobProfile {
job.printfData(fFmt, relPP, v, tr,
errQ[0], errQ[1], errQ[2], errQ[3], errQ[4], errQ[5], errQ[6],
rlb, rub, uErrCnt);
+
+ if (relPP > 0 && relPP < 1
+ && ((hra && relPP < metricsRankRange) || (!hra && relPP >= 1 - metricsRankRange))) {
+ sumAddStdDev += errQ[4];
+ numAddStdDev++;
+ }
+ if (relPP > 0 && relPP < 1
+ && ((!hra && relPP < metricsRankRange) || (hra && relPP >= 1 - metricsRankRange))) {
+ sumRelStdDev += errQ[4] / (hra ? 1 - relPP : relPP);
+ numRelStdDev++;
+ }
errQSkArr[pp].reset(); //reset the errQSkArr for next streamLength
errHllSkArr[pp].reset(); //reset the errHllSkArr for next streamLength
}
- job.println(LS + "Serialization Bytes: " + sk.getSerializationBytes());
+ int serBytes = sk.getSerializationBytes();
+
+ // special metrics for capturing accuracy per byte
+ double avgRelStdDevTimesSize = serBytes * sumRelStdDev / numRelStdDev;
+ double avgAddStdDevTimesSize = serBytes * sumAddStdDev / numAddStdDev;
+ job.println(LS + "Avg. relative std. dev. times size: " + avgRelStdDevTimesSize);
+ job.println( "Avg. additive std. dev. times size: " + avgAddStdDevTimesSize);
+
+ job.println(LS + "Serialization Bytes: " + serBytes);
job.println(sk.viewCompactorDetail("%5.0f", false));
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org