You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by pa...@apache.org on 2021/01/05 12:18:05 UTC
[datasketches-characterization] 01/01: added size&speed test for
long streams; allowing to change constants (for experimental purposes only)
This is an automated email from the ASF dual-hosted git repository.
pavelvesely pushed a commit to branch ReqExperiment
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git
commit fd8d773dfb40c6e38fea94073f335909e2934d85
Author: Pavel Vesely <ve...@iuuk.mff.cuni.cz>
AuthorDate: Tue Jan 5 13:14:59 2021 +0100
added size&speed test for long streams; allowing to change constants (for experimental purposes only)
---
.../resources/quantiles/ReqSketchAccuracyJob.conf | 28 +++++++------
...peedJob.conf => ReqSketchLongSizeSpeedJob.conf} | 29 ++++++++------
.../resources/quantiles/ReqSketchSizeSpeedJob.conf | 26 +++++++-----
.../quantiles/ReqSketchAccuracyProfile.java | 21 ++++++++--
...ile.java => ReqSketchLongSizeSpeedProfile.java} | 46 ++++++++++++++--------
.../quantiles/ReqSketchSizeSpeedProfile.java | 21 ++++++++--
6 files changed, 115 insertions(+), 56 deletions(-)
diff --git a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
index f6609a8..72a2b54 100644
--- a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
+++ b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
@@ -22,28 +22,28 @@ Pattern=Sorted # Sorted, Reversed, Zoomin, Zoomout, Random, Sqrt, FlipFlop
Offset=1 #0 for min value of 0; 1 for min value of 1
## Stream lengths
-LgMin=20 # The starting stream length
-LgMax=20 # How high the stream length goes
-LgDelta=3 # If > 0, this is the lg Increment
-PPO=8 # The horizontal x-resolution of trials points
+LgMin=24 # The starting stream length
+LgMax=24 # How high the stream length goes
+LgDelta=2 # If > 0, this is the lg Increment
+PPO=1 # The horizontal x-resolution of trials points
# Trials config (indep of sketch)
-LgTrials=12 # lgTrials at every stream length
+LgTrials=15 # lgTrials at every stream length
ErrQSkLgK=12 # the rank error distribution sketch LgK
ErrHllSkLgK=12 # the rank error HLL sketch Lgk
-Shuffle=false # If true, shuffle before each trial
+Shuffle=true # If true, shuffle before each trial
# Plotting
NumPlotPoints=100 # number of plot points along the x-axis
-EvenlySpaced=true # if true the x-axis points will be evenly spaced ranks in [0,1], otherwise exponential in [0,1]
+EvenlySpaced=false # if true the x-axis points will be evenly spaced ranks in [0,1], otherwise exponential in [0,1]
Exponent=2.0 # the steepness of the exponential x-axis density gradient curve, must be >= 1.0
StdDev=1 # std deviation used when plotting LB, UB
RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if HRA => 0.7 to 1.0
# Specific sketch config
-K=50 # sketch size and accuracy parameter
+K=24 # sketch size and accuracy parameter
-HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
+HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
Compatible=false
# For LRA, LE,GT have the converged point at rank 1.0
# For HRA, LT,GE have the converged point at rank 0.0
@@ -54,7 +54,13 @@ LtEq=true
# ReqDebugFmt=%5.0f
# Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 3
+NOM_CAPACITY_MULTIPLIER = 2
+MIN_K = 4
+LAZY_COMPRESSION = false
diff --git a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf b/src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
similarity index 64%
copy from src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
copy to src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
index 5b52f2f..ca14099 100644
--- a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
+++ b/src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
@@ -15,31 +15,36 @@
# specific language governing permissions and limitations
# under the License.
-JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchSizeSpeedProfile
+JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchLongSizeSpeedProfile
# Trials config (indep of sketch)
-LgMinT=4
+LgMinT=0
LgMaxT=12
## Stream lengths
-LgMinSL=1 # The starting stream length
-LgMaxSL=20 # How high the stream length goes
-PpoSL=2 # The horizontal x-resolution of trials points per octave
+LgMinSL=10 # The starting stream length
+LgMaxSL=36 # How high the stream length goes
LgMinBpSL=10 # The SL breakpoint where the slope starts
LgMaxBpSL=20 # The SL breakpoint where the slope stops
# Specific sketch config
-ReqK=12 # ReqSketch size and accuracy
-HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
-#For HRA, LT,GE have the converged point at rank 0.0
-#For LRA, LE,GT have the converged point at rank 1.0
+ReqK=24 # ReqSketch size and accuracy
+HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
+#For HRA, LT,GE??? have the converged point at rank 0.0
+#For LRA, LE,GT??? have the converged point at rank 1.0
Criterion=LE # LT, LE, GT, GE. Must be all caps.
#ReqDebugLevel=2 # or 0, 1, 2. disable by commenting it out. Use only when LgTrials=0
#ReqDebugFmt=%5.0f
# Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss
\ No newline at end of file
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 333 # default: 3
+NOM_CAPACITY_MULTIPLIER = 2 # default: 2
+MIN_K = 4 # default: 4
+LAZY_COMPRESSION = false # default: true
diff --git a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf b/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
index 5b52f2f..c0f6ccc 100644
--- a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
+++ b/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
@@ -18,28 +18,34 @@
JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchSizeSpeedProfile
# Trials config (indep of sketch)
-LgMinT=4
+LgMinT=0
LgMaxT=12
## Stream lengths
-LgMinSL=1 # The starting stream length
-LgMaxSL=20 # How high the stream length goes
-PpoSL=2 # The horizontal x-resolution of trials points per octave
+LgMinSL=4 # The starting stream length
+LgMaxSL=30 # How high the stream length goes
+PpoSL=1 # The horizontal x-resolution of trials points per octave
LgMinBpSL=10 # The SL breakpoint where the slope starts
LgMaxBpSL=20 # The SL breakpoint where the slope stops
# Specific sketch config
-ReqK=12 # ReqSketch size and accuracy
+ReqK=24 # ReqSketch size and accuracy
HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
-#For HRA, LT,GE have the converged point at rank 0.0
-#For LRA, LE,GT have the converged point at rank 1.0
+#For HRA, LT,GE??? have the converged point at rank 0.0
+#For LRA, LE,GT??? have the converged point at rank 1.0
Criterion=LE # LT, LE, GT, GE. Must be all caps.
#ReqDebugLevel=2 # or 0, 1, 2. disable by commenting it out. Use only when LgTrials=0
#ReqDebugFmt=%5.0f
# Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000 PDT (UTC-7) = -25_200_000
FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss
\ No newline at end of file
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 3
+NOM_CAPACITY_MULTIPLIER = 2
+MIN_K = 4
+LAZY_COMPRESSION = true
\ No newline at end of file
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
index bbdfd14..dad7540 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
@@ -76,6 +76,13 @@ public class ReqSketchAccuracyProfile implements JobProfile {
private boolean ltEq;
private org.apache.datasketches.req.ReqDebugImpl reqDebugImpl = null;
+
+ // TEMPORARY
+ int INIT_NUMBER_OF_SECTIONS;
+ float NOM_CAPACITY_MULTIPLIER;
+ int MIN_K;
+ boolean LAZY_COMPRESSION;
+
//DERIVED globals
private ReqSketch sk;
@@ -151,6 +158,12 @@ public class ReqSketchAccuracyProfile implements JobProfile {
K = Integer.parseInt(prop.mustGet("K"));
hra = Boolean.parseBoolean(prop.mustGet("HRA"));
ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+
+
+ INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+ NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+ MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+ LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
//criterion = InequalitySearch.valueOf(prop.mustGet("Criterion"));
String reqDebugLevel = prop.get("ReqDebugLevel");
String reqDebugFmt = prop.get("ReqDebugFmt");
@@ -177,10 +190,11 @@ public class ReqSketchAccuracyProfile implements JobProfile {
}
void configureSketch() {
- final ReqSketchBuilder bldr = ReqSketch.builder();
+ /*final ReqSketchBuilder bldr = ReqSketch.builder();
bldr.setK(K).setHighRankAccuracy(hra);
- if (reqDebugImpl != null) { bldr.setReqDebug(reqDebugImpl); }
- sk = bldr.build();
+ if (reqDebugImpl != null) { bldr.setReqDebug(reqDebugImpl); }*/
+ sk = new ReqSketch(K, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
+ //sk = bldr.build();
sk.setLessThanOrEqual(ltEq);
}
@@ -216,6 +230,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
void doStreamLength(final int streamLength) {
job.println(LS + "Stream Length: " + streamLength );
+ job.println(LS + "param k: " + K );
job.printfData(sFmt, (Object[])columnLabels);
//build the stream
stream = streamMaker.makeStream(streamLength, pattern, offset);
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
similarity index 79%
copy from src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
copy to src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
index 5bc4d42..73c0b95 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
@@ -23,6 +23,7 @@ import static java.lang.Math.log;
import static java.lang.Math.pow;
import static org.apache.datasketches.Util.pwr2LawNext;
+import org.apache.datasketches.Criteria;
import org.apache.datasketches.Job;
import org.apache.datasketches.JobProfile;
import org.apache.datasketches.Properties;
@@ -31,8 +32,9 @@ import org.apache.datasketches.req.ReqSketchBuilder;
/**
* @author Lee Rhodes
+ * @author Pavel Vesely
*/
-public class ReqSketchSizeSpeedProfile implements JobProfile {
+public class ReqSketchLongSizeSpeedProfile implements JobProfile {
private Job job;
private Properties prop;
@@ -44,14 +46,19 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
//For computing the different stream lengths
private int lgMinSL;
private int lgMaxSL;
- private int ppoSL;
-
+
private double slope;
+ // TEMPORARY
+ int INIT_NUMBER_OF_SECTIONS;
+ float NOM_CAPACITY_MULTIPLIER;
+ int MIN_K;
+ boolean LAZY_COMPRESSION;
+
//Target sketch configuration & error analysis
private int reqK;
private boolean hra; //high rank accuracy
- private boolean ltEq;
+ private Criteria criterion;
//DERIVED & GLOBALS
private ReqSketch reqSk;
@@ -70,12 +77,16 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
//stream length
lgMinSL = Integer.parseInt(prop.mustGet("LgMinSL"));
lgMaxSL = Integer.parseInt(prop.mustGet("LgMaxSL"));
- ppoSL = Integer.parseInt(prop.mustGet("PpoSL"));
//Target sketch config
reqK = Integer.parseInt(prop.mustGet("ReqK"));
hra = Boolean.parseBoolean(prop.mustGet("HRA"));
- ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+ criterion = Criteria.valueOf(prop.mustGet("Criterion"));
+
+ INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+ NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+ MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+ LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
}
void configureCommon() {
@@ -83,10 +94,11 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
}
void configureSketch() {
- final ReqSketchBuilder bldr = ReqSketch.builder();
- bldr.setK(reqK).setHighRankAccuracy(hra);
- reqSk = bldr.build();
- reqSk.setLessThanOrEqual(ltEq);
+ //final ReqSketchBuilder bldr = ReqSketch.builder();
+ //bldr.setK(reqK).setHighRankAccuracy(hra);
+ //reqSk = bldr.build();
+ reqSk = new ReqSketch(reqK, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
+ //reqSk.setCriterion(criterion);
}
//JobProfile interface
@@ -112,13 +124,13 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
* and outputs a row per axis plot point.
*/
private void doTrials() {
- final int maxSL = 1 << lgMaxSL;
- final int minSL = 1 << lgMinSL;
- int lastSL = 0;
+ final long maxSL = 1L << lgMaxSL;
+ final long minSL = 1L << lgMinSL;
+ long lastSL = 0;
job.printf(sFmt, (Object[]) columnLabels); //Header
int pp = 1;
while (lastSL < maxSL) { //Trials for each plotPoint on X-axis, and one row on output
- final int nextSL = lastSL == 0 ? minSL : pwr2LawNext(ppoSL, lastSL);
+ final long nextSL = lastSL == 0 ? minSL : 2 * lastSL;
lastSL = nextSL;
final int trials = getNumTrials(nextSL);
@@ -138,11 +150,11 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
* @param streamLen the streamLength for this trial
* @return the average update time per item for this trial
*/
- private double doTrial(final int streamLen) {
+ private double doTrial(final long streamLen) {
reqSk.reset();
final long startUpdateTime_nS = System.nanoTime();
- for (int i = 0; i < streamLen; i++) {
+ for (long i = 0; i < streamLen; i++) {
reqSk.update(i);
}
final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
@@ -158,7 +170,7 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
* @return the number of trials for a given current stream length for a
* trial set.
*/
- private int getNumTrials(final int curSL) {
+ private int getNumTrials(final long curSL) {
final int minBpSL = 1 << lgMinBpSL;
final int maxBpSL = 1 << lgMaxBpSL;
final int maxT = 1 << lgMaxT;
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
index 5bc4d42..3341356 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
@@ -53,6 +53,13 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
private boolean hra; //high rank accuracy
private boolean ltEq;
+
+ // TEMPORARY
+ int INIT_NUMBER_OF_SECTIONS;
+ float NOM_CAPACITY_MULTIPLIER;
+ int MIN_K;
+ boolean LAZY_COMPRESSION;
+
//DERIVED & GLOBALS
private ReqSketch reqSk;
//private KllFloatsSketch kllSk;
@@ -76,6 +83,12 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
reqK = Integer.parseInt(prop.mustGet("ReqK"));
hra = Boolean.parseBoolean(prop.mustGet("HRA"));
ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+
+
+ INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+ NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+ MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+ LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
}
void configureCommon() {
@@ -83,10 +96,12 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
}
void configureSketch() {
- final ReqSketchBuilder bldr = ReqSketch.builder();
- bldr.setK(reqK).setHighRankAccuracy(hra);
- reqSk = bldr.build();
+ /*final ReqSketchBuilder bldr = ReqSketch.builder();
+ bldr.setK(reqK).setHighRankAccuracy(hra);*/
+ //reqSk = bldr.build();
+ reqSk = new ReqSketch(reqK, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
reqSk.setLessThanOrEqual(ltEq);
+
}
//JobProfile interface
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org