You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2019/11/12 20:57:06 UTC

[incubator-datasketches-characterization] branch cpp_migration created (now 4c45695)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch cpp_migration
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git.


      at 4c45695  partial migration of C++ code

This branch includes the following new commits:

     new 4c45695  partial migration of C++ code

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-characterization] 01/01: partial migration of C++ code

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch cpp_migration
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit 4c45695f88a7d44b9e219ae5ecebe88c9c8122aa
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 12 12:56:50 2019 -0800

    partial migration of C++ code
---
 cpp/src/cpc_sketch_timing_profile.cpp   | 115 ++++++++++++++++++++++++++
 cpp/src/cpc_sketch_timing_profile.hpp   |  34 ++++++++
 cpp/src/job_profile.cpp                 |  94 ++++++++++++++++++++++
 cpp/src/job_profile.hpp                 |  47 +++++++++++
 cpp/src/kll_accuracy_profile.cpp        |  61 ++++++++++++++
 cpp/src/kll_accuracy_profile.hpp        |  35 ++++++++
 cpp/src/kll_merge_accuracy_profile.cpp  |  63 +++++++++++++++
 cpp/src/kll_merge_accuracy_profile.hpp  |  34 ++++++++
 cpp/src/kll_sketch_accuracy_profile.cpp |  47 +++++++++++
 cpp/src/kll_sketch_accuracy_profile.hpp |  34 ++++++++
 cpp/src/kll_sketch_timing_profile.cpp   | 138 ++++++++++++++++++++++++++++++++
 cpp/src/kll_sketch_timing_profile.hpp   |  34 ++++++++
 cpp/src/main.cpp                        |  45 +++++++++++
 13 files changed, 781 insertions(+)

diff --git a/cpp/src/cpc_sketch_timing_profile.cpp b/cpp/src/cpc_sketch_timing_profile.cpp
new file mode 100644
index 0000000..303bba5
--- /dev/null
+++ b/cpp/src/cpc_sketch_timing_profile.cpp
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <cpc_sketch.hpp>
+
+#include "cpc_sketch_timing_profile.hpp"
+
+namespace datasketches {
+
+void cpc_sketch_timing_profile::run() const {
+  const size_t lg_min_stream_len(0);
+  const size_t lg_max_stream_len(23);
+  const size_t ppo(16);
+
+  const size_t lg_max_trials(17);
+  const size_t lg_min_trials(10);
+
+  const int lg_k(10);
+
+  // some arbitrary starting value
+  uint64_t counter(35538947);
+
+  const uint64_t golden64(0x9e3779b97f4a7c13ULL);  // the golden ratio
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tSer\tDeser\tSize\tCoupons" << std::endl;
+
+  std::unique_ptr<cpc_sketch> sketches[1 << lg_max_trials];
+
+  size_t stream_length(1 << lg_min_stream_len);
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    size_t size_bytes(0);
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+
+    const auto start_build(std::chrono::high_resolution_clock::now());
+    for (size_t i = 0; i < num_trials; i++) {
+      sketches[i] = std::unique_ptr<cpc_sketch>(new cpc_sketch(lg_k));
+    }
+    const auto finish_build(std::chrono::high_resolution_clock::now());
+    build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+    const auto start_update(std::chrono::high_resolution_clock::now());
+    for (size_t i = 0; i < num_trials; i++) {
+      for (size_t j = 0; j < stream_length; j++) {
+        sketches[i]->update(counter);
+        counter += golden64;
+      }
+    }
+    const auto finish_update(std::chrono::high_resolution_clock::now());
+    update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    auto start_serialize(std::chrono::high_resolution_clock::now());
+    for (size_t i = 0; i < num_trials; i++) {
+      sketches[i]->serialize(s);
+    }
+    const auto finish_serialize(std::chrono::high_resolution_clock::now());
+    serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+    const auto start_deserialize(std::chrono::high_resolution_clock::now());
+    for (size_t i = 0; i < num_trials; i++) {
+      auto deserialized_sketch = cpc_sketch::deserialize(s);
+    }
+    const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+    deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+    size_bytes += s.tellp();
+
+    double total_c(0);
+    for (size_t i = 0; i < num_trials; i++) {
+      total_c += (double) sketches[i]->get_num_coupons();
+    }
+
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << (double) size_bytes / num_trials << "\t"
+        << total_c / num_trials
+        << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+
+}
+
+}
diff --git a/cpp/src/cpc_sketch_timing_profile.hpp b/cpp/src/cpc_sketch_timing_profile.hpp
new file mode 100644
index 0000000..6ccbe9e
--- /dev/null
+++ b/cpp/src/cpc_sketch_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef CPC_SKETCH_TIMING_PROFILE_HPP_
+#define CPC_SKETCH_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class cpc_sketch_timing_profile: public job_profile {
+public:
+  void run() const;
+};
+
+}
+
+#endif
diff --git a/cpp/src/job_profile.cpp b/cpp/src/job_profile.cpp
new file mode 100644
index 0000000..3e46b55
--- /dev/null
+++ b/cpp/src/job_profile.cpp
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <stdexcept>
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+std::unordered_map<std::string, std::unique_ptr<job_profile>> job_profile::registry;
+
+void job_profile::add(const char* name, std::unique_ptr<job_profile> profile) {
+   registry[name] = std::move(profile);
+}
+
+const job_profile& job_profile::instance(const char* name) {
+  auto profile = registry.find(name);
+  if (profile == registry.end()) throw std::invalid_argument("profile not found");
+   return *profile->second;
+}
+
+/*
+ * Computes the next larger integer point in the power series
+ * <i>point = 2<sup>( i / ppo )</sup></i> given the current point in the series.
+ * For illustration, this can be used in a loop as follows:
+ *
+ * <pre>{@code
+ *     int maxP = 1024;
+ *     int minP = 1;
+ *     int ppo = 2;
+ *
+ *     for (int p = minP; p <= maxP; p = pwr2LawNext(ppo, p)) {
+ *       System.out.print(p + " ");
+ *     }
+ *     //generates the following series:
+ *     //1 2 3 4 6 8 11 16 23 32 45 64 91 128 181 256 362 512 724 1024
+ * }</pre>
+ *
+ * param ppo Points-Per-Octave, or the number of points per integer powers of 2 in the series.
+ * param curPoint the current point of the series. Must be &ge; 1.
+ * returns the next point in the power series.
+ */
+size_t job_profile::pwr_2_law_next(size_t ppo, size_t cur_point) {
+  const size_t cur((cur_point < 1) ? 1 : cur_point);
+  size_t gi(round(log2(cur) * ppo)); //current generating index
+  size_t next;
+  do {
+    next = (size_t) round(pow(2.0, (double) ++gi / ppo));
+  } while ( next <= cur_point);
+  return next;
+}
+
+/*
+ * Counts the actual number of plotting points between lgStart and lgEnd assuming the given PPO.
+ * This is not a simple linear function due to points that may be skipped in the low range.
+ * param lgStart Log2 of the starting value
+ * param lgEnd Log2 of the ending value
+ * param ppo the number of logarithmically evenly spaced points per octave.
+ * returns the actual number of plotting points between lgStart and lgEnd.
+ */
+size_t job_profile::count_points(size_t lg_start, size_t lg_end, size_t ppo) {
+  size_t p(1 << lg_start);
+  const size_t end(1 << lg_end);
+  size_t count(0);
+  while (p <= end) {
+    p = pwr_2_law_next(ppo, p);
+    count++;
+  }
+  return count;
+}
+
+size_t job_profile::get_num_trials(size_t x, size_t lg_min_x, size_t lg_max_x, size_t lg_min_trials, size_t lg_max_trials) {
+  const double slope((double) (lg_max_trials - lg_min_trials) / ((int) lg_min_x - (int) lg_max_x));
+  const double lg_trials((slope * log2(x)) + lg_max_trials);
+  return (size_t) pow(2, lg_trials);
+}
+
+}
diff --git a/cpp/src/job_profile.hpp b/cpp/src/job_profile.hpp
new file mode 100644
index 0000000..6512757
--- /dev/null
+++ b/cpp/src/job_profile.hpp
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef JOB_PROFILE_HPP_
+#define JOB_PROFILE_HPP_
+
+#include <string>
+#include <unordered_map>
+
+namespace datasketches {
+
+class job_profile {
+public:
+   virtual ~job_profile() {}
+
+   static void add(const char* name, std::unique_ptr<job_profile> profile);
+   static const job_profile& instance(const char* name);
+
+   virtual void run() const = 0;
+
+   static size_t pwr_2_law_next(size_t ppo, size_t cur_point);
+   static size_t count_points(size_t lg_start, size_t lg_end, size_t ppo);
+   static size_t get_num_trials(size_t x, size_t lg_min_x, size_t lg_max_x, size_t lg_min_trials, size_t lg_max_trials);
+
+private:
+   static std::unordered_map<std::string, std::unique_ptr<job_profile>> registry;
+};
+
+}
+
+#endif
diff --git a/cpp/src/kll_accuracy_profile.cpp b/cpp/src/kll_accuracy_profile.cpp
new file mode 100644
index 0000000..10c57b1
--- /dev/null
+++ b/cpp/src/kll_accuracy_profile.cpp
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <cmath>
+
+#include "kll_accuracy_profile.hpp"
+
+namespace datasketches {
+
+void kll_accuracy_profile::run() const {
+  const unsigned lg_min(0);
+  const unsigned lg_max(23);
+  const unsigned ppo(16);
+  const unsigned num_trials(100);
+  const unsigned error_pct(99);
+
+  double rank_errors[num_trials];
+
+  unsigned max_len(1 << lg_max);
+  float* values = new float[max_len];
+
+  const unsigned num_steps = count_points(lg_min, lg_max, ppo);
+  unsigned stream_length(1 << lg_min);
+  for (unsigned i = 0; i < num_steps; i++) {
+    for (unsigned i = 0; i < stream_length; i++) values[i] = i;
+
+    for (unsigned t = 0; t < num_trials; t++) {
+      const double maxRankErrorInTrial = run_trial(values, stream_length);
+      rank_errors[t] = maxRankErrorInTrial;
+    }
+
+    std::sort(&rank_errors[0], &rank_errors[num_trials]);
+    const unsigned error_pct_index = num_trials * error_pct / 100;
+    const double rank_error = rank_errors[error_pct_index];
+
+    std::cout << stream_length << "\t" << rank_error * 100 << std::endl;
+
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+  delete [] values;
+}
+
+}
diff --git a/cpp/src/kll_accuracy_profile.hpp b/cpp/src/kll_accuracy_profile.hpp
new file mode 100644
index 0000000..77c0021
--- /dev/null
+++ b/cpp/src/kll_accuracy_profile.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef KLL_ACCURACY_PROFILE_HPP_
+#define KLL_ACCURACY_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class kll_accuracy_profile: public job_profile {
+public:
+  void run() const;
+  virtual double run_trial(float* values, unsigned stream_length) const = 0;
+};
+
+}
+
+#endif
diff --git a/cpp/src/kll_merge_accuracy_profile.cpp b/cpp/src/kll_merge_accuracy_profile.cpp
new file mode 100644
index 0000000..30090b6
--- /dev/null
+++ b/cpp/src/kll_merge_accuracy_profile.cpp
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <algorithm>
+#include <random>
+#include <chrono>
+
+#include <kll_sketch.hpp>
+
+#include "kll_merge_accuracy_profile.hpp"
+
+namespace datasketches {
+
+double kll_merge_accuracy_profile::run_trial(float* values, unsigned stream_length) const {
+  unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+  std::shuffle(values, values + stream_length, std::default_random_engine(seed));
+
+  const unsigned num_sketches(8);
+  std::unique_ptr<kll_sketch<float>> sketches[num_sketches];
+  for (unsigned i = 0; i < num_sketches; i++) {
+    sketches[i] = std::unique_ptr<kll_sketch<float>>(new kll_sketch<float>());
+  }
+
+  unsigned j(0);
+  for (size_t i = 0; i < stream_length; i++) {
+    sketches[j]->update(values[i]);
+    j++;
+    if (j == num_sketches) j = 0;
+  }
+
+  kll_sketch<float> sketch_tmp(32*200);
+  for (unsigned i = 0; i < num_sketches; i++) sketch_tmp.merge(*sketches[i]);
+
+  kll_sketch<float> sketch;
+  sketch.merge(sketch_tmp);
+
+  double max_rank_error = 0;
+  for (size_t i = 0; i < stream_length; i++) {
+    double true_rank = (double) i / stream_length;
+    double est_rank = sketch.get_rank(i);
+    max_rank_error = std::max(max_rank_error, abs(true_rank - est_rank));
+  }
+
+  return max_rank_error;
+}
+
+}
diff --git a/cpp/src/kll_merge_accuracy_profile.hpp b/cpp/src/kll_merge_accuracy_profile.hpp
new file mode 100644
index 0000000..b0c1b8d
--- /dev/null
+++ b/cpp/src/kll_merge_accuracy_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef KLL_MERGE_ACCURACY_PROFILE_HPP_
+#define KLL_MERGE_ACCURACY_PROFILE_HPP_
+
+#include "kll_accuracy_profile.hpp"
+
+namespace datasketches {
+
+class kll_merge_accuracy_profile: public kll_accuracy_profile {
+public:
+  double run_trial(float* values, unsigned stream_length) const;
+};
+
+}
+
+#endif
diff --git a/cpp/src/kll_sketch_accuracy_profile.cpp b/cpp/src/kll_sketch_accuracy_profile.cpp
new file mode 100644
index 0000000..b1f3c4e
--- /dev/null
+++ b/cpp/src/kll_sketch_accuracy_profile.cpp
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <algorithm>
+#include <random>
+#include <chrono>
+
+#include <kll_sketch.hpp>
+
+#include "kll_sketch_accuracy_profile.hpp"
+
+namespace datasketches {
+
+double kll_sketch_accuracy_profile::run_trial(float* values, unsigned stream_length) const {
+  unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+  std::shuffle(values, values + stream_length, std::default_random_engine(seed));
+
+  kll_sketch<float> sketch;
+  for (size_t i = 0; i < stream_length; i++) sketch.update(values[i]);
+
+  double max_rank_error = 0;
+  for (size_t i = 0; i < stream_length; i++) {
+    double true_rank = (double) i / stream_length;
+    double est_rank = sketch.get_rank(i);
+    max_rank_error = std::max(max_rank_error, abs(true_rank - est_rank));
+  }
+
+  return max_rank_error;
+}
+
+}
diff --git a/cpp/src/kll_sketch_accuracy_profile.hpp b/cpp/src/kll_sketch_accuracy_profile.hpp
new file mode 100644
index 0000000..0fd7695
--- /dev/null
+++ b/cpp/src/kll_sketch_accuracy_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef KLL_SKETCH_ACCURACY_PROFILE_HPP_
+#define KLL_SKETCH_ACCURACY_PROFILE_HPP_
+
+#include "kll_accuracy_profile.hpp"
+
+namespace datasketches {
+
+class kll_sketch_accuracy_profile: public kll_accuracy_profile {
+public:
+  double run_trial(float* values, unsigned stream_length) const;
+};
+
+}
+
+#endif
diff --git a/cpp/src/kll_sketch_timing_profile.cpp b/cpp/src/kll_sketch_timing_profile.cpp
new file mode 100644
index 0000000..60c88ea
--- /dev/null
+++ b/cpp/src/kll_sketch_timing_profile.cpp
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <kll_sketch.hpp>
+
+#include "kll_sketch_timing_profile.hpp"
+
+namespace datasketches {
+
+void kll_sketch_timing_profile::run() const {
+  const size_t lg_min_stream_len(0);
+  const size_t lg_max_stream_len(23);
+  const size_t ppo(16);
+
+  const size_t lg_max_trials(16);
+  const size_t lg_min_trials(6);
+
+  const size_t num_queries(20);
+
+  std::default_random_engine generator(std::chrono::system_clock::now().time_since_epoch().count());
+  std::uniform_real_distribution<float> distribution(0.0, 1.0);
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tQuant\tQuants\tRank\tCDF\tSer\tDeser\tItems\tSize" << std::endl;
+
+  size_t max_len(1 << lg_max_stream_len);
+  float* values = new float[max_len];
+
+  float rank_query_values[num_queries];
+  for (size_t i = 0; i < num_queries; i++) rank_query_values[i] = distribution(generator);
+  std::sort(&rank_query_values[0], &rank_query_values[num_queries]);
+
+  double quantile_query_values[num_queries];
+  for (size_t i = 0; i < num_queries; i++) quantile_query_values[i] = distribution(generator);
+
+  size_t stream_length(1 << lg_min_stream_len);
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds get_quantile_time_ns(0);
+    std::chrono::nanoseconds get_quantiles_time_ns(0);
+    std::chrono::nanoseconds get_rank_time_ns(0);
+    std::chrono::nanoseconds get_cdf_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    size_t num_retained(0);
+    size_t size_bytes(0);
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+    for (size_t i = 0; i < num_trials; i++) {
+      for (size_t i = 0; i < stream_length; i++) values[i] = distribution(generator);
+
+      auto start_build(std::chrono::high_resolution_clock::now());
+      kll_sketch<float> sketch;
+      auto finish_build(std::chrono::high_resolution_clock::now());
+      build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+      auto start_update(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < stream_length; i++) sketch.update(values[i]);
+      auto finish_update(std::chrono::high_resolution_clock::now());
+      update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+      auto start_get_quantile(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_queries; i++) sketch.get_quantile(quantile_query_values[i]);
+      auto finish_get_quantile(std::chrono::high_resolution_clock::now());
+      get_quantile_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_quantile - start_get_quantile);
+
+      auto start_get_quantiles(std::chrono::high_resolution_clock::now());
+      sketch.get_quantiles(quantile_query_values, num_queries);
+      auto finish_get_quantiles(std::chrono::high_resolution_clock::now());
+      get_quantiles_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_quantiles - start_get_quantiles);
+
+      auto start_get_rank(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_queries; i++) {
+        volatile double rank = sketch.get_rank(rank_query_values[i]); // volatile to prevent this from being optimized away
+      }
+      auto finish_get_rank(std::chrono::high_resolution_clock::now());
+      get_rank_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_rank - start_get_rank);
+
+      auto start_get_cdf(std::chrono::high_resolution_clock::now());
+      sketch.get_CDF(rank_query_values, num_queries);
+      auto finish_get_cdf(std::chrono::high_resolution_clock::now());
+      get_cdf_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_cdf - start_get_cdf);
+
+      std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+      auto start_serialize(std::chrono::high_resolution_clock::now());
+      sketch.serialize(s);
+      auto finish_serialize(std::chrono::high_resolution_clock::now());
+      serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+      auto start_deserialize(std::chrono::high_resolution_clock::now());
+      auto sketch_ptr(kll_sketch<float>::deserialize(s));
+      auto finish_deserialize(std::chrono::high_resolution_clock::now());
+      deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+      num_retained += sketch.get_num_retained();
+      size_bytes += s.tellp();
+    }
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) get_quantile_time_ns.count() / num_trials / num_queries << "\t"
+        << (double) get_quantiles_time_ns.count() / num_trials / num_queries << "\t"
+        << (double) get_rank_time_ns.count() / num_trials / num_queries << "\t"
+        << (double) get_cdf_time_ns.count() / num_trials / num_queries << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << num_retained / num_trials << "\t"
+        << size_bytes / num_trials << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+  delete [] values;
+}
+
+}
diff --git a/cpp/src/kll_sketch_timing_profile.hpp b/cpp/src/kll_sketch_timing_profile.hpp
new file mode 100644
index 0000000..c7872c2
--- /dev/null
+++ b/cpp/src/kll_sketch_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef KLL_SKETCH_TIMING_PROFILE_HPP_
+#define KLL_SKETCH_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class kll_sketch_timing_profile: public job_profile {
+public:
+  void run() const;
+};
+
+}
+
+#endif
diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
new file mode 100644
index 0000000..cb293c7
--- /dev/null
+++ b/cpp/src/main.cpp
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+
+#include "job_profile.hpp"
+#include "cpc_sketch_timing_profile.hpp"
+#include "kll_sketch_timing_profile.hpp"
+#include "kll_sketch_accuracy_profile.hpp"
+#include "kll_merge_accuracy_profile.hpp"
+
+using namespace datasketches;
+typedef std::unique_ptr<job_profile> job_profile_ptr;
+
+int main(int argc, char **argv) {
+  job_profile::add("cpc-timing", job_profile_ptr(new cpc_sketch_timing_profile()));
+  job_profile::add("kll-timing", job_profile_ptr(new kll_sketch_timing_profile()));
+
+  job_profile::add("kll-sketch-accuracy", job_profile_ptr(new kll_sketch_accuracy_profile()));
+  job_profile::add("kll-merge-accuracy", job_profile_ptr(new kll_merge_accuracy_profile()));
+
+  if (argc == 2) {
+    const datasketches::job_profile& profile = datasketches::job_profile::instance(argv[1]);
+    profile.run();
+  } else {
+    std::cerr << "One parameter expected: profile name" << std::endl;
+  }
+  return 0;
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org