You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/11/23 00:23:25 UTC
[incubator-datasketches-characterization] branch ZetaTesting
updated: Zeta Testing
This is an automated email from the ASF dual-hosted git repository.
leerho pushed a commit to branch ZetaTesting
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
The following commit(s) were added to refs/heads/ZetaTesting by this push:
new 7aac7b3 Zeta Testing
7aac7b3 is described below
commit 7aac7b3b65a69a08083b196d4ac3aad3effec8f4
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Thu Nov 21 17:12:37 2019 -0800
Zeta Testing
---
.../characterization/hll/HllMergeSpeedProfile.java | 13 +-
.../hll/HllMergeSpeedProfile2.java | 27 ++++
.../hll/ZetaHllMergeAccuracyProfile.java | 101 ++++++++++++++
.../uniquecount/BaseMergeSpeedProfile2.java | 152 +++++++++++++++++++++
src/main/resources/hll/HllMergeSpeedJob.conf | 6 +-
...llMergeSpeedJob.conf => HllMergeSpeedJob2.conf} | 9 +-
.../{druidhll => druid}/DruidHllAccuracyJob.conf | 0
.../DruidHllMergeAccuracyJob.conf | 0
.../ZetaHllAccuracyJob.conf | 0
.../ZetaHllMergeAccuracyJob.conf} | 20 ++-
.../ZetaHllMergeSpeedJob.conf | 4 +-
.../{zetasketchHll => zeta}/ZetaHllSerDeJob.conf | 0
.../{zetasketchHll => zeta}/ZetaHllSpeedJob.conf | 0
tools/SketchesCheckstyle.xml | 22 +--
14 files changed, 320 insertions(+), 34 deletions(-)
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java
index 05fbaf1..6b077d4 100644
--- a/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java
@@ -35,6 +35,7 @@ public class HllMergeSpeedProfile extends BaseMergeSpeedProfile {
private boolean direct;
private TgtHllType tgtHllType;
private Union union = new Union(21);
+ private HllSketch source;
@Override
public void configure() {
@@ -51,6 +52,12 @@ public class HllMergeSpeedProfile extends BaseMergeSpeedProfile {
@Override
public void resetMerge(final int lgK) {
union = new Union(lgK);
+ source = newSketch(lgK);
+ int U = 2 << lgK;
+ for (int i = 0; i < U; i++) {
+ union.update(++vIn);
+ source.update(vIn);
+ }
}
private HllSketch newSketch(final int lgK) {
@@ -74,12 +81,10 @@ public class HllMergeSpeedProfile extends BaseMergeSpeedProfile {
long serTime_nS = 0;
long deserTime_nS = 0;
long mergeTime_nS = 0;
- final HllSketch source = newSketch(lgK);
+ //final HllSketch source = newSketch(lgK);
//final long vStartUnion = vIn;
-
//final long vStart = vIn;
- source.reset();
- for (int u = 0; u < U; u++) { source.update(++vIn); }
+ //for (int u = 0; u < U; u++) { source.update(++vIn); }
//final long trueU = vIn - vStart;
//checkEstimate(trueU, source.getEstimate(), lgK, "Source");
HllSketch source2 = null;
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile2.java b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile2.java
new file mode 100644
index 0000000..ba17c7c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile2.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+/**
+ * @author Lee Rhodes
+ */
+public class HllMergeSpeedProfile2 {
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java
new file mode 100644
index 0000000..56cc3a7
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeAccuracyProfile.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import java.util.Random;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import com.google.zetasketch.HyperLogLogPlusPlus;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ZetaHllMergeAccuracyProfile implements JobProfile {
+ private HyperLogLogPlusPlus.Builder hllBuilder = new HyperLogLogPlusPlus.Builder();
+ private HyperLogLogPlusPlus<Long> target;
+
+ private static final Random random = new Random();
+
+ private Job job;
+ private int lgK;
+ private int numTrials;
+ private int numSketches;
+ private int distinctKeysPerSketch;
+
+ @Override
+ public void start(Job job) {
+ this.job = job;
+ lgK = Integer.parseInt(job.getProperties().mustGet("lgK"));
+ numTrials = Integer.parseInt(job.getProperties().mustGet("numTrials"));
+ numSketches = Integer.parseInt(job.getProperties().mustGet("numSketches"));
+ distinctKeysPerSketch = Integer.parseInt(job.getProperties().mustGet("distinctKeysPerSketch"));
+ runMergeTrials();
+ }
+
+ private HyperLogLogPlusPlus<Long> newSketch(final int lgK) {
+ final int lgSP = Math.min(lgK + 5, 25);
+ hllBuilder.normalPrecision(lgK);
+ hllBuilder.sparsePrecision(lgSP);
+ return hllBuilder.buildForLongs();
+ }
+
+ @Override
+ public void shutdown() { }
+
+ @Override
+ public void cleanup() { }
+
+ @Override
+ public void println(Object obj) {
+ job.println(obj);
+ }
+
+ private void runMergeTrials() {
+ long key = random.nextLong();
+
+ final double trueCount = numSketches * distinctKeysPerSketch;
+ double sumEstimates = 0;
+ double sumOfSquaredDeviationsFromTrueCount = 0;
+
+ for (int t = 0; t < numTrials; t++) {
+ target = newSketch(lgK);
+
+ for (int s = 0; s < numSketches; s++) {
+ final HyperLogLogPlusPlus<Long> sketch = newSketch(lgK);
+ for (int k = 0; k < distinctKeysPerSketch; k++) {
+ sketch.add(key++);
+ }
+ target.merge(sketch);
+ }
+ final double estimatedCount = target.result();
+ sumEstimates += estimatedCount;
+ sumOfSquaredDeviationsFromTrueCount += (estimatedCount - trueCount) * (estimatedCount - trueCount);
+ }
+ final double meanEstimate = sumEstimates / numTrials;
+ final double meanRelativeError = (meanEstimate / trueCount) - 1;
+ final double relativeStandardError
+ = Math.sqrt(sumOfSquaredDeviationsFromTrueCount / numTrials) / trueCount;
+ println("True count: " + trueCount);
+ println("Mean estimate: " + meanEstimate);
+ println("Mean Relative Error: " + meanRelativeError);
+ println("Relative Standard Error: " + relativeStandardError);
+ }
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile2.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile2.java
new file mode 100644
index 0000000..56cad40
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile2.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.uniquecount;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.Properties;
+import org.apache.datasketches.JobProfile;
+
+/**
+ * @author Lee Rhodes
+ */
+public abstract class BaseMergeSpeedProfile2 implements JobProfile {
+ Job job;
+ public Properties prop;
+ public long vIn = 0;
+ int minLgT;
+ int maxLgT;
+ int minLgK;
+ int maxLgK;
+ public int lgDeltaU;
+ public boolean serDe;
+
+ public Stats stats = new Stats();
+
+ //JobProfile
+ @Override
+ public void start(final Job job) {
+ this.job = job;
+ prop = job.getProperties();
+ minLgT = Integer.parseInt(prop.mustGet("MinLgT"));
+ maxLgT = Integer.parseInt(prop.mustGet("MaxLgT"));
+ minLgK = Integer.parseInt(prop.mustGet("MinLgK"));
+ maxLgK = Integer.parseInt(prop.mustGet("MaxLgK"));
+ lgDeltaU = Integer.parseInt(prop.mustGet("LgDeltaU"));
+ serDe = Boolean.parseBoolean(prop.mustGet("SerDe"));
+ configure();
+ doTrials();
+ shutdown();
+ cleanup();
+ }
+
+ @Override
+ public void shutdown() {}
+
+ @Override
+ public void cleanup() {}
+
+ @Override
+ public void println(final Object obj) {
+ job.println(obj);
+ }
+ //end JobProfile
+
+ /**
+ * Configure the sketch
+ */
+ public abstract void configure();
+
+ /**
+ * Perform a single trial
+ * @param stats stats array
+ * @param lgK sketch size
+ * @param lgDeltaU delta size determining U: +1 = 2K, +2 = 4K; -1 = K/2, -2 = K/4, etc.
+ */
+ public abstract void doTrial(Stats stats, int lgK, int lgDeltaU);
+
+ public abstract void resetMerge(int lgK);
+
+ private void doTrials() {
+ final StringBuilder dataStr = new StringBuilder();
+ println(getHeader());
+ final Stats stats = new Stats();
+ int lgK;
+
+ for (lgK = minLgK; lgK <= maxLgK; lgK++) {
+ final int lgT = (maxLgK - lgK) + minLgT;
+ final int trials = 1 << lgT;
+ double sumSerializeTime_nS = 0;
+ double sumDeserialzeTime_nS = 0;
+ double sumMergeTime_nS = 0;
+ double sumTotalTime_nS = 0;
+ resetMerge(lgK);
+ for (int t = 0; t < trials; t++) {
+ doTrial(stats, lgK, lgDeltaU);
+ sumSerializeTime_nS += stats.serializeTime_nS;
+ sumDeserialzeTime_nS += stats.deserializeTime_nS;
+ sumMergeTime_nS += stats.mergeTime_nS;
+ sumTotalTime_nS += stats.totalTime_nS;
+ }
+ //Per sketch per trial
+ stats.serializeTime_nS = sumSerializeTime_nS / trials;
+ stats.deserializeTime_nS = sumDeserialzeTime_nS / trials;
+ stats.mergeTime_nS = sumMergeTime_nS / trials;
+ stats.totalTime_nS = sumTotalTime_nS / trials;
+ process(stats, lgK, lgT, dataStr);
+ println(dataStr.toString());
+ }
+ }
+
+ private static void process(final Stats stats,
+ final int lgK, final int lgT, final StringBuilder dataStr) {
+
+ //OUTPUT
+ dataStr.setLength(0);
+ dataStr.append(lgK).append(TAB);
+ dataStr.append(lgT).append(TAB);
+ dataStr.append(stats.serializeTime_nS).append(TAB);
+ dataStr.append(stats.deserializeTime_nS).append(TAB);
+ dataStr.append(stats.mergeTime_nS).append(TAB);
+ dataStr.append(stats.totalTime_nS).append(TAB);
+ final double slotTime_nS = stats.totalTime_nS / (1 << lgK);
+ dataStr.append(slotTime_nS);
+ }
+
+ private static String getHeader() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("LgK").append(TAB);
+ sb.append("LgT").append(TAB);
+ sb.append("Ser_nS").append(TAB);
+ sb.append("DeSer_nS").append(TAB);
+ sb.append("Merge_nS").append(TAB);
+ sb.append("Total_nS").append(TAB);
+ sb.append("PerSlot_nS");
+ return sb.toString();
+ }
+
+ public static class Stats {
+ public double serializeTime_nS;
+ public double deserializeTime_nS;
+ public double mergeTime_nS;
+ public double totalTime_nS;
+ }
+
+
+}
diff --git a/src/main/resources/hll/HllMergeSpeedJob.conf b/src/main/resources/hll/HllMergeSpeedJob.conf
index 7f173c2..f86c083 100644
--- a/src/main/resources/hll/HllMergeSpeedJob.conf
+++ b/src/main/resources/hll/HllMergeSpeedJob.conf
@@ -28,13 +28,13 @@ MinLgK=10
MaxLgK=21
# Trials Profile
-MinLgT=6 #Min Log Trails
-MaxLgT=6 #Max Log Trials
+MinLgT=11 #Min Log Trails
+MaxLgT=11 #Max Log Trials
#Job Profile
JobProfile=org.apache.datasketches.characterization.hll.HllMergeSpeedProfile
SerDe=false
-LgDeltaU=1
+LgDeltaU=2
HLL_tgtHllType=HLL8
HLL_direct=false #only for Theta, HLL. See javadocs.
HLL_useComposite=false #HllAccuracyProfie
diff --git a/src/main/resources/hll/HllMergeSpeedJob.conf b/src/main/resources/hll/HllMergeSpeedJob2.conf
similarity index 93%
copy from src/main/resources/hll/HllMergeSpeedJob.conf
copy to src/main/resources/hll/HllMergeSpeedJob2.conf
index 7f173c2..24b27d4 100644
--- a/src/main/resources/hll/HllMergeSpeedJob.conf
+++ b/src/main/resources/hll/HllMergeSpeedJob2.conf
@@ -23,9 +23,10 @@ TimeZoneOffset=-28800000 # offset in millisec
FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-#X-axis LgK Profile
-MinLgK=10
-MaxLgK=21
+#X-axis LgU Profile
+MinLgU=0
+MaxLgU=20
+UPPO=4
# Trials Profile
MinLgT=6 #Min Log Trails
@@ -33,6 +34,8 @@ MaxLgT=6 #Max Log Trials
#Job Profile
JobProfile=org.apache.datasketches.characterization.hll.HllMergeSpeedProfile
+LgK=12
+
SerDe=false
LgDeltaU=1
HLL_tgtHllType=HLL8
diff --git a/src/main/resources/hll/druidhll/DruidHllAccuracyJob.conf b/src/main/resources/hll/druid/DruidHllAccuracyJob.conf
similarity index 100%
rename from src/main/resources/hll/druidhll/DruidHllAccuracyJob.conf
rename to src/main/resources/hll/druid/DruidHllAccuracyJob.conf
diff --git a/src/main/resources/hll/druidhll/DruidHllMergeAccuracyJob.conf b/src/main/resources/hll/druid/DruidHllMergeAccuracyJob.conf
similarity index 100%
rename from src/main/resources/hll/druidhll/DruidHllMergeAccuracyJob.conf
rename to src/main/resources/hll/druid/DruidHllMergeAccuracyJob.conf
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllAccuracyJob.conf b/src/main/resources/hll/zeta/ZetaHllAccuracyJob.conf
similarity index 100%
rename from src/main/resources/hll/zetasketchHll/ZetaHllAccuracyJob.conf
rename to src/main/resources/hll/zeta/ZetaHllAccuracyJob.conf
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf b/src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
similarity index 87%
copy from src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
copy to src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
index ef979ce..30f1ff8 100644
--- a/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
+++ b/src/main/resources/hll/zeta/ZetaHllMergeAccuracyJob.conf
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-# Job
+#Job
# Date-Time Profile
TimeZone=PST
@@ -23,17 +23,15 @@ TimeZoneOffset=-28800000 # offset in millisec
FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
-#X-axis LgK Profile
-MinLgK=10
-MaxLgK=21
-
# Trials Profile
-MinLgT=6 #Min Log Trails
-MaxLgT=6 #Max Log Trials
+numTrials=100
+numSketches=8192
+distinctKeysPerSketch=32768
-#Job Profile
-JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeSpeedProfile
+# Job Profile
+JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeAccuracyProfile
ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
-SerDe=true
-LgDeltaU=1
+lgK=11
+
+
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf b/src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
similarity index 98%
rename from src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
rename to src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
index ef979ce..fc76c54 100644
--- a/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
+++ b/src/main/resources/hll/zeta/ZetaHllMergeSpeedJob.conf
@@ -34,6 +34,6 @@ MaxLgT=6 #Max Log Trials
#Job Profile
JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeSpeedProfile
ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
-SerDe=true
-LgDeltaU=1
+SerDe=false
+LgDeltaU=2
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllSerDeJob.conf b/src/main/resources/hll/zeta/ZetaHllSerDeJob.conf
similarity index 100%
rename from src/main/resources/hll/zetasketchHll/ZetaHllSerDeJob.conf
rename to src/main/resources/hll/zeta/ZetaHllSerDeJob.conf
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllSpeedJob.conf b/src/main/resources/hll/zeta/ZetaHllSpeedJob.conf
similarity index 100%
rename from src/main/resources/hll/zetasketchHll/ZetaHllSpeedJob.conf
rename to src/main/resources/hll/zeta/ZetaHllSpeedJob.conf
diff --git a/tools/SketchesCheckstyle.xml b/tools/SketchesCheckstyle.xml
index 7b1a7a3..b7659fd 100644
--- a/tools/SketchesCheckstyle.xml
+++ b/tools/SketchesCheckstyle.xml
@@ -23,7 +23,7 @@ under the License.
-->
<!--
- SketchesCheckstyle.xml for datasketches-java
+ SketchesCheckstyle.xml
Checkstyle is very configurable. Be sure to read the documentation at
http://checkstyle.sourceforge.net (or in your downloaded distribution). Note: Does not work with https.
@@ -48,6 +48,14 @@ under the License.
<property name="lineSeparator" value="lf"/>
</module>
+ <!-- Size Violations -->
+ <module name="LineLength">
+ <property name="severity" value="warning"/>
+ <property name="max" value="110"/>
+ <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
+ <!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> -->
+ </module>
+
<module name="TreeWalker">
<!-- Annotations -->
@@ -121,7 +129,7 @@ under the License.
<module name="MultipleVariableDeclarations">
<property name="severity" value="ignore"/>
- <!--<metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> -->
+ <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/>
</module>
<module name="NoFinalizer">
@@ -185,7 +193,7 @@ under the License.
<property name="allowMissingParamTags" value="false"/>
<property name="allowMissingThrowsTags" value="true"/>
<property name="allowMissingReturnTag" value="false"/>
- <property name="minLineCount" value="2"/>
+ <!-- <property name="minLineCount" value="2"/> -->
<property name="allowedAnnotations" value="Override, Test"/>
<property name="allowThrowsTagsForSubclasses" value="true"/>
</module>
@@ -333,14 +341,6 @@ under the License.
<property name="ignoreComments" value="true"/>
</module>
- <!-- Size Violations -->
- <module name="LineLength">
- <property name="severity" value="warning"/>
- <property name="max" value="110"/>
- <property name="ignorePattern" value="^package.*|^import.*|a href|href|http://|https://|ftp://"/>
- <!-- <metadata name="net.sf.eclipsecs.core.lastEnabledSeverity" value="inherit"/> -->
- </module>
-
<!-- Whitespace -->
<module name="EmptyLineSeparator">
<property name="allowNoEmptyLineBetweenFields" value="true"/>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org