You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by pa...@apache.org on 2021/01/05 12:18:05 UTC

[datasketches-characterization] 01/01: added size&speed test for long streams; allowing to change constants (for experimental purposes only)

This is an automated email from the ASF dual-hosted git repository.

pavelvesely pushed a commit to branch ReqExperiment
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git

commit fd8d773dfb40c6e38fea94073f335909e2934d85
Author: Pavel Vesely <ve...@iuuk.mff.cuni.cz>
AuthorDate: Tue Jan 5 13:14:59 2021 +0100

    added size&speed test for long streams; allowing to change constants (for experimental purposes only)
---
 .../resources/quantiles/ReqSketchAccuracyJob.conf  | 28 +++++++------
 ...peedJob.conf => ReqSketchLongSizeSpeedJob.conf} | 29 ++++++++------
 .../resources/quantiles/ReqSketchSizeSpeedJob.conf | 26 +++++++-----
 .../quantiles/ReqSketchAccuracyProfile.java        | 21 ++++++++--
 ...ile.java => ReqSketchLongSizeSpeedProfile.java} | 46 ++++++++++++++--------
 .../quantiles/ReqSketchSizeSpeedProfile.java       | 21 ++++++++--
 6 files changed, 115 insertions(+), 56 deletions(-)

diff --git a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
index f6609a8..72a2b54 100644
--- a/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
+++ b/src/main/resources/quantiles/ReqSketchAccuracyJob.conf
@@ -22,28 +22,28 @@ Pattern=Sorted # Sorted, Reversed, Zoomin, Zoomout, Random, Sqrt, FlipFlop
 Offset=1 #0 for min value of 0; 1 for min value of 1
 
 ## Stream lengths
-LgMin=20    # The starting stream length
-LgMax=20    # How high the stream length goes
-LgDelta=3   # If > 0, this is the lg Increment
-PPO=8       # The horizontal x-resolution of trials points
+LgMin=24    # The starting stream length
+LgMax=24    # How high the stream length goes
+LgDelta=2   # If > 0, this is the lg Increment
+PPO=1       # The horizontal x-resolution of trials points
 
 # Trials config (indep of sketch)
-LgTrials=12 # lgTrials at every stream length
+LgTrials=15 # lgTrials at every stream length
 ErrQSkLgK=12   # the rank error distribution sketch LgK
 ErrHllSkLgK=12 # the rank error HLL sketch Lgk
-Shuffle=false # If true, shuffle before each trial
+Shuffle=true # If true, shuffle before each trial
 
 # Plotting
 NumPlotPoints=100 # number of plot points along the x-axis
-EvenlySpaced=true # if true the x-axis points will be evenly spaced ranks in [0,1], otherwise exponential in [0,1]
+EvenlySpaced=false # if true the x-axis points will be evenly spaced ranks in [0,1], otherwise exponential in [0,1]
 Exponent=2.0 # the steepness of the exponential x-axis density gradient curve, must be >= 1.0
 StdDev=1 # std deviation used when plotting LB, UB
 RankRange=1.0 # range of rank to plot. E.g., given 0.3: if LRA => 0 to 0.3; if HRA => 0.7 to 1.0 
 
 # Specific sketch config
-K=50 # sketch size and accuracy parameter
+K=24 # sketch size and accuracy parameter
 
-HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
+HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
 Compatible=false
 # For LRA, LE,GT have the converged point at rank 1.0
 # For HRA, LT,GE have the converged point at rank 0.0
@@ -54,7 +54,13 @@ LtEq=true
 # ReqDebugFmt=%5.0f
 
 # Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
 FileNameDateFormat=yyyyMMdd'_'HHmmssz
 ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 3
+NOM_CAPACITY_MULTIPLIER = 2
+MIN_K = 4
+LAZY_COMPRESSION = false
diff --git a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf b/src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
similarity index 64%
copy from src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
copy to src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
index 5b52f2f..ca14099 100644
--- a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
+++ b/src/main/resources/quantiles/ReqSketchLongSizeSpeedJob.conf
@@ -15,31 +15,36 @@
 # specific language governing permissions and limitations
 # under the License.
 
-JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchSizeSpeedProfile
+JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchLongSizeSpeedProfile
 
 # Trials config (indep of sketch)
-LgMinT=4
+LgMinT=0
 LgMaxT=12
 
 ## Stream lengths
-LgMinSL=1    # The starting stream length
-LgMaxSL=20    # How high the stream length goes
-PpoSL=2       # The horizontal x-resolution of trials points per octave
+LgMinSL=10    # The starting stream length
+LgMaxSL=36    # How high the stream length goes
 LgMinBpSL=10   # The SL breakpoint where the slope starts
 LgMaxBpSL=20   # The SL breakpoint where the slope stops
 
 # Specific sketch config
-ReqK=12 # ReqSketch size and accuracy 
-HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
-#For HRA, LT,GE have the converged point at rank 0.0
-#For LRA, LE,GT have the converged point at rank 1.0
+ReqK=24 # ReqSketch size and accuracy 
+HRA=true # if true use high-rank accuracy, otherwise low-rank accuracy
+#For HRA, LT,GE??? have the converged point at rank 0.0
+#For LRA, LE,GT??? have the converged point at rank 1.0
 Criterion=LE # LT, LE, GT, GE. Must be all caps.
 
 #ReqDebugLevel=2 # or 0, 1, 2. disable by commenting it out. Use only when LgTrials=0
 #ReqDebugFmt=%5.0f
 
 # Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
 FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
\ No newline at end of file
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 333       # default: 3
+NOM_CAPACITY_MULTIPLIER = 2       # default: 2
+MIN_K = 4                         # default: 4
+LAZY_COMPRESSION = false          # default: true
diff --git a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf b/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
index 5b52f2f..c0f6ccc 100644
--- a/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
+++ b/src/main/resources/quantiles/ReqSketchSizeSpeedJob.conf
@@ -18,28 +18,34 @@
 JobProfile=org.apache.datasketches.characterization.quantiles.ReqSketchSizeSpeedProfile
 
 # Trials config (indep of sketch)
-LgMinT=4
+LgMinT=0
 LgMaxT=12
 
 ## Stream lengths
-LgMinSL=1    # The starting stream length
-LgMaxSL=20    # How high the stream length goes
-PpoSL=2       # The horizontal x-resolution of trials points per octave
+LgMinSL=4    # The starting stream length
+LgMaxSL=30    # How high the stream length goes
+PpoSL=1       # The horizontal x-resolution of trials points per octave
 LgMinBpSL=10   # The SL breakpoint where the slope starts
 LgMaxBpSL=20   # The SL breakpoint where the slope stops
 
 # Specific sketch config
-ReqK=12 # ReqSketch size and accuracy 
+ReqK=24 # ReqSketch size and accuracy 
 HRA=false # if true use high-rank accuracy, otherwise low-rank accuracy
-#For HRA, LT,GE have the converged point at rank 0.0
-#For LRA, LE,GT have the converged point at rank 1.0
+#For HRA, LT,GE??? have the converged point at rank 0.0
+#For LRA, LE,GT??? have the converged point at rank 1.0
 Criterion=LE # LT, LE, GT, GE. Must be all caps.
 
 #ReqDebugLevel=2 # or 0, 1, 2. disable by commenting it out. Use only when LgTrials=0
 #ReqDebugFmt=%5.0f
 
 # Date-Time Profile
-TimeZone=PDT
-TimeZoneOffset=-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
+TimeZone=UTC
+TimeZoneOffset=0 #-25200000 # offset in millisec: PST (UTC-8) = -28_800_000  PDT (UTC-7) = -25_200_000
 FileNameDateFormat=yyyyMMdd'_'HHmmssz
-ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
\ No newline at end of file
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss 
+
+# TEMPORARY
+INIT_NUMBER_OF_SECTIONS = 3
+NOM_CAPACITY_MULTIPLIER = 2
+MIN_K = 4
+LAZY_COMPRESSION = true
\ No newline at end of file
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
index bbdfd14..dad7540 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchAccuracyProfile.java
@@ -76,6 +76,13 @@ public class ReqSketchAccuracyProfile implements JobProfile {
   private boolean ltEq;
   private org.apache.datasketches.req.ReqDebugImpl reqDebugImpl = null;
 
+  
+  // TEMPORARY
+  int INIT_NUMBER_OF_SECTIONS;
+  float NOM_CAPACITY_MULTIPLIER;
+  int MIN_K;
+  boolean LAZY_COMPRESSION;
+
   //DERIVED globals
   private ReqSketch sk;
 
@@ -151,6 +158,12 @@ public class ReqSketchAccuracyProfile implements JobProfile {
     K = Integer.parseInt(prop.mustGet("K"));
     hra = Boolean.parseBoolean(prop.mustGet("HRA"));
     ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+    
+    
+    INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+    NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+    MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+    LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
     //criterion = InequalitySearch.valueOf(prop.mustGet("Criterion"));
     String reqDebugLevel = prop.get("ReqDebugLevel");
     String reqDebugFmt = prop.get("ReqDebugFmt");
@@ -177,10 +190,11 @@ public class ReqSketchAccuracyProfile implements JobProfile {
   }
 
   void configureSketch() {
-    final ReqSketchBuilder bldr = ReqSketch.builder();
+    /*final ReqSketchBuilder bldr = ReqSketch.builder();
     bldr.setK(K).setHighRankAccuracy(hra);
-    if (reqDebugImpl != null) { bldr.setReqDebug(reqDebugImpl); }
-    sk = bldr.build();
+    if (reqDebugImpl != null) { bldr.setReqDebug(reqDebugImpl); }*/
+    sk = new ReqSketch(K, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
+    //sk = bldr.build();
     sk.setLessThanOrEqual(ltEq);
   }
 
@@ -216,6 +230,7 @@ public class ReqSketchAccuracyProfile implements JobProfile {
 
   void doStreamLength(final int streamLength) {
     job.println(LS + "Stream Length: " + streamLength );
+    job.println(LS + "param k: " + K );
     job.printfData(sFmt, (Object[])columnLabels);
     //build the stream
     stream = streamMaker.makeStream(streamLength, pattern, offset);
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
similarity index 79%
copy from src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
copy to src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
index 5bc4d42..73c0b95 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchLongSizeSpeedProfile.java
@@ -23,6 +23,7 @@ import static java.lang.Math.log;
 import static java.lang.Math.pow;
 import static org.apache.datasketches.Util.pwr2LawNext;
 
+import org.apache.datasketches.Criteria;
 import org.apache.datasketches.Job;
 import org.apache.datasketches.JobProfile;
 import org.apache.datasketches.Properties;
@@ -31,8 +32,9 @@ import org.apache.datasketches.req.ReqSketchBuilder;
 
 /**
  * @author Lee Rhodes
+ * @author Pavel Vesely
  */
-public class ReqSketchSizeSpeedProfile implements JobProfile {
+public class ReqSketchLongSizeSpeedProfile implements JobProfile {
   private Job job;
   private Properties prop;
 
@@ -44,14 +46,19 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
   //For computing the different stream lengths
   private int lgMinSL;
   private int lgMaxSL;
-  private int ppoSL;
-
+  
   private double slope;
 
+  // TEMPORARY
+  int INIT_NUMBER_OF_SECTIONS;
+  float NOM_CAPACITY_MULTIPLIER;
+  int MIN_K;
+  boolean LAZY_COMPRESSION;
+
   //Target sketch configuration & error analysis
   private int reqK;
   private boolean hra; //high rank accuracy
-  private boolean ltEq;
+  private Criteria criterion;
 
   //DERIVED & GLOBALS
   private ReqSketch reqSk;
@@ -70,12 +77,16 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
     //stream length
     lgMinSL = Integer.parseInt(prop.mustGet("LgMinSL"));
     lgMaxSL = Integer.parseInt(prop.mustGet("LgMaxSL"));
-    ppoSL = Integer.parseInt(prop.mustGet("PpoSL"));
 
     //Target sketch config
     reqK = Integer.parseInt(prop.mustGet("ReqK"));
     hra = Boolean.parseBoolean(prop.mustGet("HRA"));
-    ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+    criterion = Criteria.valueOf(prop.mustGet("Criterion"));
+    
+    INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+    NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+    MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+    LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
   }
 
   void configureCommon() {
@@ -83,10 +94,11 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
   }
 
   void configureSketch() {
-    final ReqSketchBuilder bldr = ReqSketch.builder();
-    bldr.setK(reqK).setHighRankAccuracy(hra);
-    reqSk = bldr.build();
-    reqSk.setLessThanOrEqual(ltEq);
+    //final ReqSketchBuilder bldr = ReqSketch.builder();
+    //bldr.setK(reqK).setHighRankAccuracy(hra);
+    //reqSk = bldr.build();
+    reqSk = new ReqSketch(reqK, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
+    //reqSk.setCriterion(criterion);
   }
 
 //JobProfile interface
@@ -112,13 +124,13 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
    * and outputs a row per axis plot point.
    */
   private void doTrials() {
-    final int maxSL = 1 << lgMaxSL;
-    final int minSL = 1 << lgMinSL;
-    int lastSL = 0;
+    final long maxSL = 1L << lgMaxSL;
+    final long minSL = 1L << lgMinSL;
+    long lastSL = 0;
     job.printf(sFmt, (Object[]) columnLabels); //Header
     int pp = 1;
     while (lastSL < maxSL) { //Trials for each plotPoint on X-axis, and one row on output
-      final int nextSL = lastSL == 0 ? minSL : pwr2LawNext(ppoSL, lastSL);
+      final long nextSL = lastSL == 0 ? minSL : 2 * lastSL;
       lastSL = nextSL;
       final int trials = getNumTrials(nextSL);
 
@@ -138,11 +150,11 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
    * @param streamLen the streamLength for this trial
    * @return the average update time per item for this trial
    */
-  private double doTrial(final int streamLen) {
+  private double doTrial(final long streamLen) {
     reqSk.reset();
     final long startUpdateTime_nS = System.nanoTime();
 
-    for (int i = 0; i < streamLen; i++) {
+    for (long i = 0; i < streamLen; i++) {
       reqSk.update(i);
     }
     final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
@@ -158,7 +170,7 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
    * @return the number of trials for a given current stream length for a
    * trial set.
    */
-  private int getNumTrials(final int curSL) {
+  private int getNumTrials(final long curSL) {
     final int minBpSL = 1 << lgMinBpSL;
     final int maxBpSL = 1 << lgMaxBpSL;
     final int maxT = 1 << lgMaxT;
diff --git a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
index 5bc4d42..3341356 100644
--- a/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
+++ b/src/test/java/org/apache/datasketches/characterization/quantiles/ReqSketchSizeSpeedProfile.java
@@ -53,6 +53,13 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
   private boolean hra; //high rank accuracy
   private boolean ltEq;
 
+  
+  // TEMPORARY
+  int INIT_NUMBER_OF_SECTIONS;
+  float NOM_CAPACITY_MULTIPLIER;
+  int MIN_K;
+  boolean LAZY_COMPRESSION;
+
   //DERIVED & GLOBALS
   private ReqSketch reqSk;
   //private KllFloatsSketch kllSk;
@@ -76,6 +83,12 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
     reqK = Integer.parseInt(prop.mustGet("ReqK"));
     hra = Boolean.parseBoolean(prop.mustGet("HRA"));
     ltEq = Boolean.parseBoolean(prop.mustGet("LtEq"));
+    
+    
+    INIT_NUMBER_OF_SECTIONS = Integer.parseInt(prop.mustGet("INIT_NUMBER_OF_SECTIONS"));
+    NOM_CAPACITY_MULTIPLIER = Float.parseFloat(prop.mustGet("NOM_CAPACITY_MULTIPLIER"));
+    MIN_K = Integer.parseInt(prop.mustGet("MIN_K"));
+    LAZY_COMPRESSION = Boolean.parseBoolean(prop.mustGet("LAZY_COMPRESSION"));
   }
 
   void configureCommon() {
@@ -83,10 +96,12 @@ public class ReqSketchSizeSpeedProfile implements JobProfile {
   }
 
   void configureSketch() {
-    final ReqSketchBuilder bldr = ReqSketch.builder();
-    bldr.setK(reqK).setHighRankAccuracy(hra);
-    reqSk = bldr.build();
+    /*final ReqSketchBuilder bldr = ReqSketch.builder();
+    bldr.setK(reqK).setHighRankAccuracy(hra);*/
+    //reqSk = bldr.build();
+    reqSk = new ReqSketch(reqK, hra, null, (byte)INIT_NUMBER_OF_SECTIONS, MIN_K, NOM_CAPACITY_MULTIPLIER, LAZY_COMPRESSION);
     reqSk.setLessThanOrEqual(ltEq);
+    
   }
 
 //JobProfile interface


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org