You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/15 20:34:23 UTC

[incubator-datasketches-characterization] 01/01: tuple sketch and union timing

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit 16721a2d145c0d28a492fb9805f221c2dd482fd7
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 15 13:33:14 2020 -0700

    tuple sketch and union timing
---
 cpp/src/main.cpp                        |   5 ++
 cpp/src/tuple_sketch_timing_profile.cpp | 106 ++++++++++++++++++++++++
 cpp/src/tuple_sketch_timing_profile.hpp |  34 ++++++++
 cpp/src/tuple_union_timing_profile.cpp  | 142 ++++++++++++++++++++++++++++++++
 cpp/src/tuple_union_timing_profile.hpp  |  34 ++++++++
 5 files changed, 321 insertions(+)

diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 9317d63..8cd8362 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -30,6 +30,9 @@
 #include "theta_sketch_timing_profile.hpp"
 #include "theta_union_timing_profile.hpp"
 
+#include "tuple_sketch_timing_profile.hpp"
+#include "tuple_union_timing_profile.hpp"
+
 #include "kll_sketch_timing_profile.hpp"
 #include "kll_merge_timing_profile.hpp"
 
@@ -67,6 +70,8 @@ int main(int argc, char **argv) {
   job_profile::add("hll-union-timing", job_profile_ptr(new hll_union_timing_profile()));
   job_profile::add("theta-sketch-timing", job_profile_ptr(new theta_sketch_timing_profile()));
   job_profile::add("theta-union-timing", job_profile_ptr(new theta_union_timing_profile()));
+  job_profile::add("tuple-sketch-timing", job_profile_ptr(new tuple_sketch_timing_profile()));
+  job_profile::add("tuple-union-timing", job_profile_ptr(new tuple_union_timing_profile()));
   job_profile::add("kll-sketch-timing-float", job_profile_ptr(new kll_sketch_timing_profile<float>()));
   job_profile::add("kll-sketch-timing-string", job_profile_ptr(new kll_sketch_timing_profile<std::string>()));
   job_profile::add("kll-merge-timing-float", job_profile_ptr(new kll_merge_timing_profile<float>()));
diff --git a/cpp/src/tuple_sketch_timing_profile.cpp b/cpp/src/tuple_sketch_timing_profile.cpp
new file mode 100644
index 0000000..91e6a8f
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.cpp
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_sketch.hpp>
+
+#include "tuple_sketch_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_sketch_timing_profile::run() {
+  const size_t lg_min_stream_len(0);
+  const size_t lg_max_stream_len(24);
+  const size_t ppo(16);
+
+  const size_t lg_max_trials(16);
+  const size_t lg_min_trials(8);
+
+  update_tuple_sketch<double>::builder builder;
+
+  // some arbitrary starting value
+  uint64_t counter(35538947);
+
+  const uint64_t golden64(0x9e3779b97f4a7c13ULL);  // the golden ratio
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tSer\tDeser\tSize" << std::endl;
+
+  size_t stream_length(1 << lg_min_stream_len);
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds compact_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    size_t size_bytes(0);
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+
+    for (size_t i = 0; i < num_trials; i++) {
+      const auto start_build(std::chrono::high_resolution_clock::now());
+      update_tuple_sketch<double> sketch = builder.build();
+      const auto finish_build(std::chrono::high_resolution_clock::now());
+      build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+      const auto start_update(std::chrono::high_resolution_clock::now());
+      for (size_t j = 0; j < stream_length; j++) {
+        sketch.update(counter, 1);
+        counter += golden64;
+      }
+      const auto finish_update(std::chrono::high_resolution_clock::now());
+      update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+      auto start_compact(std::chrono::high_resolution_clock::now());
+      auto compact_sketch = sketch.compact();
+      const auto finish_compact(std::chrono::high_resolution_clock::now());
+      compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compact - start_compact);
+
+      auto start_serialize(std::chrono::high_resolution_clock::now());
+      auto bytes = compact_sketch.serialize();
+      const auto finish_serialize(std::chrono::high_resolution_clock::now());
+      serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+      const auto start_deserialize(std::chrono::high_resolution_clock::now());
+      auto deserialized_sketch = compact_tuple_sketch<double>::deserialize(bytes.data(), bytes.size());
+      const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+      deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+      size_bytes += bytes.size();
+    }
+
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << (double) size_bytes / num_trials << "\t"
+        << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+
+}
+
+}
diff --git a/cpp/src/tuple_sketch_timing_profile.hpp b/cpp/src/tuple_sketch_timing_profile.hpp
new file mode 100644
index 0000000..a41ffe7
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_SKETCH_TIMING_PROFILE_HPP_
+#define TUPLE_SKETCH_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_sketch_timing_profile: public job_profile {
+public:
+  void run();
+};
+
+}
+
+#endif
diff --git a/cpp/src/tuple_union_timing_profile.cpp b/cpp/src/tuple_union_timing_profile.cpp
new file mode 100644
index 0000000..79c5f34
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.cpp
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_union.hpp>
+
+#include "tuple_union_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_union_timing_profile::run() {
+  const size_t lg_min_stream_len = 0;
+  const size_t lg_max_stream_len = 26;
+  const size_t ppo = 16;
+
+  const size_t lg_max_trials = 14;
+  const size_t lg_min_trials = 6;
+
+  const int lg_k = 12;
+  const int num_sketches_to_union = 32;
+
+  // some arbitrary starting value
+  uint64_t counter = 35538947;
+  const uint64_t golden64 = 0x9e3779b97f4a7c13ULL;  // the golden ratio
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tCompact\tSerialize\tDeserialize\tUnion\tResult" << std::endl;
+
+  std::unique_ptr<update_tuple_sketch<double>> update_sketches[num_sketches_to_union];
+  std::unique_ptr<compact_tuple_sketch<double>> compact_sketches[num_sketches_to_union];
+
+  auto sketch_builder = update_tuple_sketch<double>::builder().set_lg_k(lg_k);
+  auto union_builder = tuple_union<double>::builder().set_lg_k(lg_k);
+
+  size_t stream_length = 1 << lg_min_stream_len;
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds compact_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    std::chrono::nanoseconds union_time_ns(0);
+    std::chrono::nanoseconds result_time_ns(0);
+    size_t num_retained = 0;
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+    for (size_t t = 0; t < num_trials; t++) {
+      const auto start_build(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        update_sketches[i] = std::unique_ptr<update_tuple_sketch<double>>(new update_tuple_sketch<double>(sketch_builder.build()));
+      }
+      tuple_union<double> u = union_builder.build();
+      const auto finish_build(std::chrono::high_resolution_clock::now());
+      build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+      const auto start_update(std::chrono::high_resolution_clock::now());
+      size_t i = 0;
+      for (size_t j = 0; j < stream_length; j++) {
+        update_sketches[i]->update(counter, counter);
+        counter += golden64;
+        i++;
+        if (i == num_sketches_to_union) i = 0;
+      }
+      const auto finish_update(std::chrono::high_resolution_clock::now());
+      update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+      const auto start_compacting(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        update_sketches[i]->trim();
+        compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(update_sketches[i]->compact()));
+      }
+      const auto finish_compacting(std::chrono::high_resolution_clock::now());
+      compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compacting - start_compacting);
+
+      std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+      const auto start_serialize(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        compact_sketches[i]->serialize(s);
+      }
+      const auto finish_serialize(std::chrono::high_resolution_clock::now());
+      serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+      const auto start_deserialize(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(compact_tuple_sketch<double>::deserialize(s)));
+      }
+      const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+      deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+      const auto start_union(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        u.update(*compact_sketches[i]);
+      }
+      const auto finish_union(std::chrono::high_resolution_clock::now());
+      union_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_union - start_union);
+
+      const auto start_result(std::chrono::high_resolution_clock::now());
+      auto result = u.get_result();
+      const auto finish_result(std::chrono::high_resolution_clock::now());
+      result_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_result - start_result);
+
+      num_retained += result.get_num_retained();
+    }
+
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) compact_time_ns.count() / num_trials << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << (double) union_time_ns.count() / num_trials << "\t"
+        << (double) result_time_ns.count() / num_trials << "\t"
+        << (double) num_retained / num_trials
+        << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+
+}
+
+}
diff --git a/cpp/src/tuple_union_timing_profile.hpp b/cpp/src/tuple_union_timing_profile.hpp
new file mode 100644
index 0000000..9539e04
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_UNION_TIMING_PROFILE_HPP_
+#define TUPLE_UNION_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_union_timing_profile: public job_profile {
+public:
+  void run();
+};
+
+}
+
+#endif


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org