You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by le...@apache.org on 2019/11/11 22:57:18 UTC

[incubator-datasketches-characterization] branch ZetaTesting created (now b19a490)

This is an automated email from the ASF dual-hosted git repository.

leerho pushed a change to branch ZetaTesting
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git.


      at b19a490  Characterization tests for ZetaSketch,

This branch includes the following new commits:

     new b19a490  Characterization tests for ZetaSketch,

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-characterization] 01/01: Characterization tests for ZetaSketch,

Posted by le...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

leerho pushed a commit to branch ZetaTesting
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit b19a49067cb71100f3a87762447a4bbe5b5879e3
Author: Lee Rhodes <le...@users.noreply.github.com>
AuthorDate: Mon Nov 11 14:56:34 2019 -0800

    Characterization tests for ZetaSketch,
    
    New BaseMergeSpeedProfile for standardized merge testing.
---
 pom.xml                                            |   8 ++
 .../characterization/hll/HllMergeSpeedProfile.java | 143 +++++++++++++++++++
 .../hll/ZetaHllAccuracyProfile.java                |  84 +++++++++++
 .../hll/ZetaHllMergeSpeedProfile.java              |  89 ++++++++++++
 .../characterization/hll/ZetaHllSerDeProfile.java  | 110 +++++++++++++++
 .../hll/ZetaHllUpdateSpeedProfile.java             |  80 +++++++++++
 .../uniquecount/BaseMergeSpeedProfile.java         | 154 +++++++++++++++++++++
 .../uniquecount/BaseSerDeProfile.java              |   2 +
 .../hll/zetasketchHll/ZetaHllAccuracyJob.conf      |  46 ++++++
 .../hll/zetasketchHll/ZetaHllMergeSpeedJob.conf    |  39 ++++++
 .../hll/zetasketchHll/ZetaHllSerDeJob.conf         |  43 ++++++
 .../hll/zetasketchHll/ZetaHllSpeedJob.conf         |  43 ++++++
 12 files changed, 841 insertions(+)

diff --git a/pom.xml b/pom.xml
index 7812611..0666b2e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -99,6 +99,7 @@ under the License.
     <druid-momentsketch.version>0.16.0-incubating</druid-momentsketch.version>
     <druid-histogram.version>0.16.0-incubating</druid-histogram.version>
     <druid-hll.version>0.16.0-incubating</druid-hll.version>
+    <zetasketch.version>0.1.0</zetasketch.version>
     <!-- END:UNIQUE FOR THIS JAVA COMPONENT -->
 
     <!-- Test -->
@@ -197,6 +198,13 @@ under the License.
       <artifactId>druid-histogram</artifactId>
       <version>${druid-histogram.version}</version>
     </dependency>
+    
+    <!-- ZetaSketch -->
+    <dependency>
+      <groupId>com.google.zetasketch</groupId>
+      <artifactId>zetasketch</artifactId>
+      <version>${zetasketch.version}</version>
+    </dependency>
 
     <!-- Dependency on Test code -->
     <dependency>
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java
new file mode 100644
index 0000000..b16349c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/HllMergeSpeedProfile.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import org.apache.datasketches.characterization.uniquecount.BaseMergeSpeedProfile;
+import org.apache.datasketches.hll.HllSketch;
+import org.apache.datasketches.hll.TgtHllType;
+import org.apache.datasketches.hll.Union;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableMemory;
+
+/**
+ * @author Lee Rhodes
+ */
+public class HllMergeSpeedProfile extends BaseMergeSpeedProfile {
+  private boolean compact;
+  private boolean wrap;
+  private boolean direct;
+  private TgtHllType tgtHllType;
+  private Union union = new Union(21);
+
+  @Override
+  public void configure() {
+    direct = Boolean.parseBoolean(prop.mustGet("HLL_direct"));
+    compact = Boolean.parseBoolean(prop.mustGet("HLL_compact"));
+    wrap = Boolean.parseBoolean(prop.mustGet("HLL_wrap"));
+
+    final String type = prop.mustGet("HLL_tgtHllType");
+    if (type.equalsIgnoreCase("HLL4")) { tgtHllType = TgtHllType.HLL_4; }
+    else if (type.equalsIgnoreCase("HLL6")) { tgtHllType = TgtHllType.HLL_6; }
+    else { tgtHllType = TgtHllType.HLL_8; }
+  }
+
+  @Override
+  public void resetMerge(final int lgK) {
+    union = new Union(lgK);
+  }
+
+  private HllSketch newSketch(final int lgK) {
+    WritableMemory wmem = null;
+    final HllSketch sk;
+    if (direct) {
+      final int bytes = HllSketch.getMaxUpdatableSerializationBytes(lgK, tgtHllType);
+      wmem = WritableMemory.allocate(bytes);
+      sk = new HllSketch(lgK, tgtHllType, wmem);
+    } else {
+      sk = new HllSketch(lgK, tgtHllType);
+    }
+    return sk;
+  }
+
+
+  @Override
+  public void doTrial(final Stats stats, final int lgK, final int lgDeltaU) {
+    final int U = 1 << (lgK + lgDeltaU);
+    long start;
+    long serTime_nS = 0;
+    long deserTime_nS = 0;
+    long mergeTime_nS = 0;
+    final HllSketch source = newSketch(lgK);
+    final long vStartUnion = vIn;
+
+    final long vStart = vIn;
+    source.reset();
+    for (int u = 0; u < U; u++) { source.update(++vIn); }
+    final long trueU = vIn - vStart;
+    //checkEstimate(trueU, source.getEstimate(), lgK, "Source");
+    HllSketch source2 = null;
+    final byte[] byteArr;
+
+    if (serDe) {
+      //Serialize
+      if (compact) {
+        start = System.nanoTime();
+        byteArr = source.toCompactByteArray();
+        serTime_nS += System.nanoTime() - start;
+      } else {
+        start = System.nanoTime();
+        byteArr = source.toUpdatableByteArray();
+        serTime_nS += System.nanoTime() - start;
+      }
+      //Deserialize
+      if (wrap) {
+        start = System.nanoTime();
+        final Memory mem = Memory.wrap(byteArr);
+        source2 = HllSketch.wrap(mem);
+        deserTime_nS += System.nanoTime() - start;
+      } else { //heapify
+        start = System.nanoTime();
+        final Memory mem = Memory.wrap(byteArr);
+        source2 = HllSketch.heapify(mem);
+        deserTime_nS += System.nanoTime() - start;
+      }
+      //checkEstimate(trueU, source2.getEstimate(), lgK, "SerDe");
+      //Merge
+      start = System.nanoTime();
+      union.update(source2);
+      mergeTime_nS += System.nanoTime() - start;
+
+    } else {
+      //Merge
+      start = System.nanoTime();
+      union.update(source);
+      mergeTime_nS += System.nanoTime() - start;
+    }
+
+    stats.serializeTime_nS = serTime_nS;
+    stats.deserializeTime_nS = deserTime_nS;
+    stats.mergeTime_nS = mergeTime_nS;
+    stats.totalTime_nS = mergeTime_nS;
+
+    //final double vUnionActual = vIn - vStartUnion;
+    //checkEstimate(vUnionActual, union.getEstimate(), lgK, "Union");
+
+  }
+
+  void checkEstimate(final double actual, final double est, final int lgK, final String note) {
+    final double k = 1L << lgK;
+    final double bound = 3.0 / Math.sqrt(k);
+    final double err = Math.abs((est / actual) - 1.0);
+    if (err > bound) {
+      System.out.printf("ERROR: %12.3f %12.3f %20s\n", err, bound, note);
+    }
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java
new file mode 100644
index 0000000..0e06ae4
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllAccuracyProfile.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import org.apache.datasketches.AccuracyStats;
+import org.apache.datasketches.characterization.uniquecount.BaseAccuracyProfile;
+
+import com.google.zetasketch.HyperLogLogPlusPlus;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ZetaHllAccuracyProfile extends BaseAccuracyProfile {
+  private enum ZetaType { LONG, INTEGER, STRING, BYTES }
+
+  private HyperLogLogPlusPlus<?> sketch;
+  private HyperLogLogPlusPlus.Builder hllBuilder;
+  private int lgSP;
+  private String zetaType;
+  private ZetaType zType;
+
+  @Override
+  public void configure() {
+    lgSP = Integer.parseInt(prop.mustGet("LgSP"));
+    zetaType = prop.mustGet("ZetaType");
+    hllBuilder = new HyperLogLogPlusPlus.Builder();
+    hllBuilder.normalPrecision(lgK);
+    hllBuilder.sparsePrecision(lgSP);
+    if (zetaType.equals("LONG")) {
+      zType = ZetaType.LONG;
+    } else if (zetaType.equals("INTEGER")) {
+      zType = ZetaType.INTEGER;
+    } else if (zetaType.equals("STRING")) {
+      zType = ZetaType.STRING;
+    } else if (zetaType.equals("BYTES")) {
+      zType = ZetaType.BYTES;
+    }
+    reset();
+  }
+
+  private void reset() {
+    switch (zType) {
+      case LONG:    sketch = hllBuilder.buildForLongs(); break;
+      case INTEGER: sketch = hllBuilder.buildForIntegers(); break;
+      case STRING:  sketch = hllBuilder.buildForStrings(); break;
+      case BYTES:   sketch = hllBuilder.buildForBytes(); break;
+    }
+  }
+
+  @Override
+  public void doTrial() {
+    final int qArrLen = qArr.length;
+    reset();
+    int lastUniques = 0;
+    for (int i = 0; i < qArrLen; i++) {
+      final AccuracyStats q = qArr[i];
+      final double delta = q.trueValue - lastUniques;
+      for (int u = 0; u < delta; u++) {
+        sketch.add(++vIn);
+      }
+      lastUniques += delta;
+      final double est = sketch.result();
+      q.update(est);
+    }
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java
new file mode 100644
index 0000000..4dc4eff
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllMergeSpeedProfile.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import org.apache.datasketches.characterization.uniquecount.BaseMergeSpeedProfile;
+
+import com.google.zetasketch.HyperLogLogPlusPlus;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ZetaHllMergeSpeedProfile extends BaseMergeSpeedProfile {
+  private HyperLogLogPlusPlus.Builder hllBuilder;
+  private HyperLogLogPlusPlus<Long> target;
+
+  @Override
+  public void configure() {
+    hllBuilder = new HyperLogLogPlusPlus.Builder();
+  }
+
+  @Override
+  public void resetMerge(final int lgK) {
+    target = newSketch(lgK);
+  }
+
+  private HyperLogLogPlusPlus<Long> newSketch(final int lgK) {
+    final int lgSP = Math.min(lgK + 5, 25);
+    hllBuilder.normalPrecision(lgK);
+    hllBuilder.sparsePrecision(lgSP);
+    return hllBuilder.buildForLongs();
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void doTrial(final Stats stats, final int lgK, final int lgDeltaU) {
+    final int U = 1 << (lgK + lgDeltaU);
+    long start;
+    long serTime_nS = 0;
+    long deserTime_nS = 0;
+    long mergeTime_nS = 0;
+    final HyperLogLogPlusPlus<Long> source = newSketch(lgK);
+    for (int u = 0; u < U; u++) { source.add(++vIn); }
+    final HyperLogLogPlusPlus<Long> source2;
+
+    if (serDe) {
+      //Serialize
+      start = System.nanoTime();
+      final byte[] byteArr = source.serializeToByteArray();
+      serTime_nS += System.nanoTime() - start;
+      //Deserialize
+      start = System.nanoTime();
+      source2 = (HyperLogLogPlusPlus<Long>) HyperLogLogPlusPlus.forProto(byteArr);
+      deserTime_nS += System.nanoTime() - start;
+      //Merge
+      start = System.nanoTime();
+      target.merge(source2);
+      mergeTime_nS += System.nanoTime() - start;
+
+    } else {
+      //Merge
+      start = System.nanoTime();
+      target.merge(source);
+      mergeTime_nS += System.nanoTime() - start;
+    }
+
+    stats.serializeTime_nS = serTime_nS;
+    stats.deserializeTime_nS = deserTime_nS;
+    stats.mergeTime_nS = mergeTime_nS;
+    stats.totalTime_nS = deserTime_nS + mergeTime_nS;
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java
new file mode 100644
index 0000000..27f9b63
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllSerDeProfile.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import org.apache.datasketches.characterization.uniquecount.BaseSerDeProfile;
+
+import com.google.zetasketch.HyperLogLogPlusPlus;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ZetaHllSerDeProfile extends BaseSerDeProfile {
+  private enum ZetaType { LONG, INTEGER, STRING, BYTES }
+
+  private HyperLogLogPlusPlus<?> sketch1;
+  private HyperLogLogPlusPlus<?> sketch2;
+  private HyperLogLogPlusPlus.Builder hllBuilder;
+
+  private int lgSP;
+  private String zetaType;
+  private ZetaType zType;
+
+  @Override
+  public void configure() {
+    lgSP = Integer.parseInt(prop.mustGet("LgSP"));
+    zetaType = prop.mustGet("ZetaType");
+    hllBuilder = new HyperLogLogPlusPlus.Builder();
+    hllBuilder.normalPrecision(lgK);
+    hllBuilder.sparsePrecision(lgSP);
+    if (zetaType.equals("LONG")) {
+      zType = ZetaType.LONG;
+    } else if (zetaType.equals("INTEGER")) {
+      zType = ZetaType.INTEGER;
+    } else if (zetaType.equals("STRING")) {
+      zType = ZetaType.STRING;
+    } else if (zetaType.equals("BYTES")) {
+      zType = ZetaType.BYTES;
+    }
+    reset();
+  }
+
+  private void reset() {
+    switch (zType) {
+      case LONG:
+        sketch1 = hllBuilder.buildForLongs();
+        sketch2 = hllBuilder.buildForLongs();
+        break;
+      case INTEGER:
+        sketch1 = hllBuilder.buildForIntegers();
+        sketch2 = hllBuilder.buildForIntegers();
+        break;
+      case STRING:
+        sketch1 = hllBuilder.buildForStrings();
+        sketch2 = hllBuilder.buildForStrings();
+        break;
+      case BYTES:
+        sketch1 = hllBuilder.buildForBytes();
+        sketch2 = hllBuilder.buildForBytes();
+        break;
+    }
+  }
+
+  @Override
+  public void doTrial(final Stats stats, final int uPerTrial) {
+    reset();
+    //Serialize
+    for (int u = uPerTrial; u-- > 0;) {
+      sketch1.add(++vIn);
+    }
+    final double est1 = sketch1.result();
+
+    final byte[] byteArr;
+    final long startSerTime_nS, stopSerTime_nS;
+
+    startSerTime_nS = System.nanoTime();
+    byteArr = sketch1.serializeToByteArray();
+    stopSerTime_nS = System.nanoTime();
+
+    //Deserialize
+    final long startDeserTime_nS, stopDeserTime_nS;
+    startDeserTime_nS = System.nanoTime();
+    sketch2 = HyperLogLogPlusPlus.forProto(byteArr);
+    stopDeserTime_nS = System.nanoTime();
+
+    final double est2 = sketch2.result();
+    assert est1 == est2;
+
+    stats.serializeTime_nS = stopSerTime_nS - startSerTime_nS;
+    stats.deserializeTime_nS = stopDeserTime_nS - startDeserTime_nS;
+    stats.size_bytes = byteArr.length;
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java
new file mode 100644
index 0000000..14b730c
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/hll/ZetaHllUpdateSpeedProfile.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.hll;
+
+import org.apache.datasketches.characterization.uniquecount.BaseUpdateSpeedProfile;
+
+import com.google.zetasketch.HyperLogLogPlusPlus;
+
+/**
+ * @author Lee Rhodes
+ */
+public class ZetaHllUpdateSpeedProfile extends BaseUpdateSpeedProfile {
+  private enum ZetaType { LONG, INTEGER, STRING, BYTES }
+
+  private HyperLogLogPlusPlus<?> sketch;
+  private HyperLogLogPlusPlus.Builder hllBuilder;
+  private int lgK;
+  private int lgSP;
+  private String zetaType;
+  private ZetaType zType;
+
+  @Override
+  public void configure() {
+    lgK = Integer.parseInt(prop.mustGet("LgK"));
+    lgSP = Integer.parseInt(prop.mustGet("LgSP"));
+    zetaType = prop.mustGet("ZetaType");
+    hllBuilder = new HyperLogLogPlusPlus.Builder();
+    hllBuilder.normalPrecision(lgK);
+    hllBuilder.sparsePrecision(lgSP);
+    if (zetaType.equals("LONG")) {
+      zType = ZetaType.LONG;
+    } else if (zetaType.equals("INTEGER")) {
+      zType = ZetaType.INTEGER;
+    } else if (zetaType.equals("STRING")) {
+      zType = ZetaType.STRING;
+    } else if (zetaType.equals("BYTES")) {
+      zType = ZetaType.BYTES;
+    }
+    reset();
+  }
+
+  private void reset() {
+    switch (zType) {
+      case LONG:    sketch = hllBuilder.buildForLongs(); break;
+      case INTEGER: sketch = hllBuilder.buildForIntegers(); break;
+      case STRING:  sketch = hllBuilder.buildForStrings(); break;
+      case BYTES:   sketch = hllBuilder.buildForBytes(); break;
+    }
+  }
+
+  @Override
+  public double doTrial(final int uPerTrial) {
+    reset();
+    final long startUpdateTime_nS = System.nanoTime();
+
+    for (int u = uPerTrial; u-- > 0;) {
+      sketch.add(++vIn);
+    }
+    final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
+    return (double) updateTime_nS / uPerTrial;
+  }
+
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile.java
new file mode 100644
index 0000000..7d22f6b
--- /dev/null
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseMergeSpeedProfile.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.datasketches.characterization.uniquecount;
+
+//import static java.lang.Math.log;
+//import static java.lang.Math.pow;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.Properties;
+
+/**
+ * @author Lee Rhodes
+ */
+//@SuppressWarnings("javadoc")
+public abstract class BaseMergeSpeedProfile implements JobProfile  {
+  Job job;
+  public Properties prop;
+  public long vIn = 0;
+  int minLgT;
+  int maxLgT;
+  int minLgK;
+  int maxLgK;
+  public int lgDeltaU;
+  public boolean serDe;
+
+  public Stats stats = new Stats();
+
+  //JobProfile
+  @Override
+  public void start(final Job job) {
+    this.job = job;
+    prop = job.getProperties();
+    minLgT = Integer.parseInt(prop.mustGet("MinLgT"));
+    maxLgT = Integer.parseInt(prop.mustGet("MaxLgT"));
+    minLgK = Integer.parseInt(prop.mustGet("MinLgK"));
+    maxLgK = Integer.parseInt(prop.mustGet("MaxLgK"));
+    lgDeltaU = Integer.parseInt(prop.mustGet("LgDeltaU"));
+    serDe = Boolean.parseBoolean(prop.mustGet("SerDe"));
+    configure();
+    doTrials();
+    shutdown();
+    cleanup();
+  }
+
+  @Override
+  public void shutdown() {}
+
+  @Override
+  public void cleanup() {}
+
+  @Override
+  public void println(final Object obj) {
+    job.println(obj);
+  }
+  //end JobProfile
+
+  /**
+   * Configure the sketch
+   */
+  public abstract void configure();
+
+  /**
+   * Perform a single trial
+   * @param stats stats array
+   * @param lgK sketch size
+   * @param lgDeltaU delta size determining U: +1 = 2K, +2 = 4K; -1 = K/2, -2 = K/4, etc.
+   */
+  public abstract void doTrial(Stats stats, int lgK, int lgDeltaU);
+
+  public abstract void resetMerge(int lgK);
+
+  private void doTrials() {
+    final StringBuilder dataStr = new StringBuilder();
+    println(getHeader());
+    final Stats stats = new Stats();
+    int lgK;
+
+    for (lgK = minLgK; lgK <= maxLgK; lgK++) {
+      final int lgT = (maxLgK - lgK) + minLgT;
+      final int trials = 1 << lgT;
+      double sumSerializeTime_nS = 0;
+      double sumDeserialzeTime_nS = 0;
+      double sumMergeTime_nS = 0;
+      double sumTotalTime_nS = 0;
+      resetMerge(lgK);
+      for (int t = 0; t < trials; t++) {
+        doTrial(stats, lgK, lgDeltaU);
+        sumSerializeTime_nS += stats.serializeTime_nS;
+        sumDeserialzeTime_nS += stats.deserializeTime_nS;
+        sumMergeTime_nS += stats.mergeTime_nS;
+        sumTotalTime_nS += stats.totalTime_nS;
+      }
+      //Per sketch per trial
+      stats.serializeTime_nS = sumSerializeTime_nS / trials;
+      stats.deserializeTime_nS = sumDeserialzeTime_nS / trials;
+      stats.mergeTime_nS = sumMergeTime_nS / trials;
+      stats.totalTime_nS = sumTotalTime_nS / trials;
+      process(stats, lgK, lgT, dataStr);
+      println(dataStr.toString());
+    }
+  }
+
+  private static void process(final Stats stats,
+      final int lgK, final int lgT, final StringBuilder dataStr) {
+
+    //OUTPUT
+    dataStr.setLength(0);
+    dataStr.append(lgK).append(TAB);
+    dataStr.append(lgT).append(TAB);
+    dataStr.append(stats.serializeTime_nS).append(TAB);
+    dataStr.append(stats.deserializeTime_nS).append(TAB);
+    dataStr.append(stats.mergeTime_nS).append(TAB);
+    dataStr.append(stats.totalTime_nS).append(TAB);
+    final double slotTime_nS = stats.totalTime_nS / (1 << lgK);
+    dataStr.append(slotTime_nS);
+  }
+
+  private static String getHeader() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append("LgK").append(TAB);
+    sb.append("LgT").append(TAB);
+    sb.append("Ser_nS").append(TAB);
+    sb.append("DeSer_nS").append(TAB);
+    sb.append("Merge_nS").append(TAB);
+    sb.append("Total_nS").append(TAB);
+    sb.append("PerSlot_nS");
+    return sb.toString();
+  }
+
+  public static class Stats {
+    public double serializeTime_nS;
+    public double deserializeTime_nS;
+    public double mergeTime_nS;
+    public double totalTime_nS;
+  }
+}
diff --git a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
index a97fdaf..fa77183 100644
--- a/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
+++ b/src/main/java/org/apache/datasketches/characterization/uniquecount/BaseSerDeProfile.java
@@ -61,6 +61,8 @@ public abstract class BaseSerDeProfile implements JobProfile {
     lgK = Integer.parseInt(prop.mustGet("LgK"));
     configure();
     doTrials();
+    shutdown();
+    cleanup();
   }
 
   @Override
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllAccuracyJob.conf b/src/main/resources/hll/zetasketchHll/ZetaHllAccuracyJob.conf
new file mode 100644
index 0000000..b26ef1c
--- /dev/null
+++ b/src/main/resources/hll/zetasketchHll/ZetaHllAccuracyJob.conf
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+#Uniques Profile
+Trials_lgMinU=0  #The starting # of uniques that is printed at the end.
+Trials_lgMaxU=23 #How high the # uniques go
+Trials_UPPO=16   #The horizontal x-resolution of trials points
+Trials_string=false #Used in Druid HLL profile
+
+# Trials Profile
+Trials_lgMinT=8  #prints intermediate results starting w/ this lgMinT
+Trials_lgMaxT=16 #The max trials
+Trials_TPPO=1    #how often intermediate results are printed
+
+Trials_lgQK=12   #size of quantiles sketch for analysis
+Trials_interData=true
+Trials_postPMFs=false
+Trials_bytes=false
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.hll.ZetaHllAccuracyProfile
+LgK=12 #Normal precision, Min 10, Max 24
+LgSP=17 #Sparse precision= Normal Prcision + 0 to 5, Max 25
+ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf b/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
new file mode 100644
index 0000000..ef979ce
--- /dev/null
+++ b/src/main/resources/hll/zetasketchHll/ZetaHllMergeSpeedJob.conf
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#X-axis LgK Profile
+MinLgK=10
+MaxLgK=21
+
+# Trials Profile
+MinLgT=6 #Min Log Trails
+MaxLgT=6 #Max Log Trials
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.hll.ZetaHllMergeSpeedProfile
+ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
+SerDe=true
+LgDeltaU=1
+
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllSerDeJob.conf b/src/main/resources/hll/zetasketchHll/ZetaHllSerDeJob.conf
new file mode 100644
index 0000000..1d36642
--- /dev/null
+++ b/src/main/resources/hll/zetasketchHll/ZetaHllSerDeJob.conf
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+#Uniques Profile
+Trials_lgMinU=0  #The starting # of uniques that is printed at the end.
+Trials_lgMaxU=24 #How high the # uniques go
+Trials_UPPO=2    #The horizontal x-resolution of trial points, Points Per Octave
+
+# Trials Profile
+Trials_lgMaxT=20  #Max trials at start (low counts)
+Trials_lgMinT=7  #Min trials at tail (high counts) 
+
+#Trails Speed related
+Trials_lgMinBpU=4   #start the downward slope of trials at this LgU
+Trials_lgMaxBpU=20  #stop the downward slope of trials at this LgU
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.hll.ZetaHllSerDeProfile
+LgK=20 #Normal precision, Min 10, Max 24
+LgSP=25 #Sparse precision= Normal Prcision + 0 to 5, Max 25
+ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES
\ No newline at end of file
diff --git a/src/main/resources/hll/zetasketchHll/ZetaHllSpeedJob.conf b/src/main/resources/hll/zetasketchHll/ZetaHllSpeedJob.conf
new file mode 100644
index 0000000..9b32341
--- /dev/null
+++ b/src/main/resources/hll/zetasketchHll/ZetaHllSpeedJob.conf
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+#Uniques Profile
+Trials_lgMinU=0  #The starting # of uniques that is printed at the end.
+Trials_lgMaxU=23 #How high the # uniques go
+Trials_UPPO=16   #The horizontal x-resolution of trials points
+
+# Trials Profile
+Trials_lgMaxT=24 #Max trials at start (low counts)
+Trials_lgMinT=4  #Min trials at tail (high counts) 
+
+#Trails Speed related
+Trials_lgMinBpU=4   #start the downward slope of trials at this LgU
+Trials_lgMaxBpU=20  #stop the downward slope of trials at this LgU
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.hll.ZetaHllUpdateSpeedProfile
+LgK=12 #Normal precision, Min 10, Max 24
+LgSP=17 #Sparse precision= Normal Prcision + 0 to 5, Max 25
+ZetaType=LONG #one of LONG,INTEGER,STRING,BYTES


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org