You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/15 20:34:22 UTC

[incubator-datasketches-characterization] branch tuple_sketch created (now 16721a2)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git.


      at 16721a2  tuple sketch and union timing

This branch includes the following new commits:

     new 16721a2  tuple sketch and union timing

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-characterization] 01/01: tuple sketch and union timing

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git

commit 16721a2d145c0d28a492fb9805f221c2dd482fd7
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 15 13:33:14 2020 -0700

    tuple sketch and union timing
---
 cpp/src/main.cpp                        |   5 ++
 cpp/src/tuple_sketch_timing_profile.cpp | 106 ++++++++++++++++++++++++
 cpp/src/tuple_sketch_timing_profile.hpp |  34 ++++++++
 cpp/src/tuple_union_timing_profile.cpp  | 142 ++++++++++++++++++++++++++++++++
 cpp/src/tuple_union_timing_profile.hpp  |  34 ++++++++
 5 files changed, 321 insertions(+)

diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 9317d63..8cd8362 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -30,6 +30,9 @@
 #include "theta_sketch_timing_profile.hpp"
 #include "theta_union_timing_profile.hpp"
 
+#include "tuple_sketch_timing_profile.hpp"
+#include "tuple_union_timing_profile.hpp"
+
 #include "kll_sketch_timing_profile.hpp"
 #include "kll_merge_timing_profile.hpp"
 
@@ -67,6 +70,8 @@ int main(int argc, char **argv) {
   job_profile::add("hll-union-timing", job_profile_ptr(new hll_union_timing_profile()));
   job_profile::add("theta-sketch-timing", job_profile_ptr(new theta_sketch_timing_profile()));
   job_profile::add("theta-union-timing", job_profile_ptr(new theta_union_timing_profile()));
+  job_profile::add("tuple-sketch-timing", job_profile_ptr(new tuple_sketch_timing_profile()));
+  job_profile::add("tuple-union-timing", job_profile_ptr(new tuple_union_timing_profile()));
   job_profile::add("kll-sketch-timing-float", job_profile_ptr(new kll_sketch_timing_profile<float>()));
   job_profile::add("kll-sketch-timing-string", job_profile_ptr(new kll_sketch_timing_profile<std::string>()));
   job_profile::add("kll-merge-timing-float", job_profile_ptr(new kll_merge_timing_profile<float>()));
diff --git a/cpp/src/tuple_sketch_timing_profile.cpp b/cpp/src/tuple_sketch_timing_profile.cpp
new file mode 100644
index 0000000..91e6a8f
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.cpp
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_sketch.hpp>
+
+#include "tuple_sketch_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_sketch_timing_profile::run() {
+  const size_t lg_min_stream_len(0);
+  const size_t lg_max_stream_len(24);
+  const size_t ppo(16);
+
+  const size_t lg_max_trials(16);
+  const size_t lg_min_trials(8);
+
+  update_tuple_sketch<double>::builder builder;
+
+  // some arbitrary starting value
+  uint64_t counter(35538947);
+
+  const uint64_t golden64(0x9e3779b97f4a7c13ULL);  // the golden ratio
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tSer\tDeser\tSize" << std::endl;
+
+  size_t stream_length(1 << lg_min_stream_len);
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds compact_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    size_t size_bytes(0);
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+
+    for (size_t i = 0; i < num_trials; i++) {
+      const auto start_build(std::chrono::high_resolution_clock::now());
+      update_tuple_sketch<double> sketch = builder.build();
+      const auto finish_build(std::chrono::high_resolution_clock::now());
+      build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+      const auto start_update(std::chrono::high_resolution_clock::now());
+      for (size_t j = 0; j < stream_length; j++) {
+        sketch.update(counter, 1);
+        counter += golden64;
+      }
+      const auto finish_update(std::chrono::high_resolution_clock::now());
+      update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+      auto start_compact(std::chrono::high_resolution_clock::now());
+      auto compact_sketch = sketch.compact();
+      const auto finish_compact(std::chrono::high_resolution_clock::now());
+      compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compact - start_compact);
+
+      auto start_serialize(std::chrono::high_resolution_clock::now());
+      auto bytes = compact_sketch.serialize();
+      const auto finish_serialize(std::chrono::high_resolution_clock::now());
+      serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+      const auto start_deserialize(std::chrono::high_resolution_clock::now());
+      auto deserialized_sketch = compact_tuple_sketch<double>::deserialize(bytes.data(), bytes.size());
+      const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+      deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+      size_bytes += bytes.size();
+    }
+
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << (double) size_bytes / num_trials << "\t"
+        << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+
+}
+
+}
diff --git a/cpp/src/tuple_sketch_timing_profile.hpp b/cpp/src/tuple_sketch_timing_profile.hpp
new file mode 100644
index 0000000..a41ffe7
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_SKETCH_TIMING_PROFILE_HPP_
+#define TUPLE_SKETCH_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_sketch_timing_profile: public job_profile {
+public:
+  void run();
+};
+
+}
+
+#endif
diff --git a/cpp/src/tuple_union_timing_profile.cpp b/cpp/src/tuple_union_timing_profile.cpp
new file mode 100644
index 0000000..79c5f34
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.cpp
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_union.hpp>
+
+#include "tuple_union_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_union_timing_profile::run() {
+  const size_t lg_min_stream_len = 0;
+  const size_t lg_max_stream_len = 26;
+  const size_t ppo = 16;
+
+  const size_t lg_max_trials = 14;
+  const size_t lg_min_trials = 6;
+
+  const int lg_k = 12;
+  const int num_sketches_to_union = 32;
+
+  // some arbitrary starting value
+  uint64_t counter = 35538947;
+  const uint64_t golden64 = 0x9e3779b97f4a7c13ULL;  // the golden ratio
+
+  std::cout << "Stream\tTrials\tBuild\tUpdate\tCompact\tSerialize\tDeserialize\tUnion\tResult" << std::endl;
+
+  std::unique_ptr<update_tuple_sketch<double>> update_sketches[num_sketches_to_union];
+  std::unique_ptr<compact_tuple_sketch<double>> compact_sketches[num_sketches_to_union];
+
+  auto sketch_builder = update_tuple_sketch<double>::builder().set_lg_k(lg_k);
+  auto union_builder = tuple_union<double>::builder().set_lg_k(lg_k);
+
+  size_t stream_length = 1 << lg_min_stream_len;
+  while (stream_length <= (1 << lg_max_stream_len)) {
+
+    std::chrono::nanoseconds build_time_ns(0);
+    std::chrono::nanoseconds update_time_ns(0);
+    std::chrono::nanoseconds compact_time_ns(0);
+    std::chrono::nanoseconds serialize_time_ns(0);
+    std::chrono::nanoseconds deserialize_time_ns(0);
+    std::chrono::nanoseconds union_time_ns(0);
+    std::chrono::nanoseconds result_time_ns(0);
+    size_t num_retained = 0;
+
+    const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+    for (size_t t = 0; t < num_trials; t++) {
+      const auto start_build(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        update_sketches[i] = std::unique_ptr<update_tuple_sketch<double>>(new update_tuple_sketch<double>(sketch_builder.build()));
+      }
+      tuple_union<double> u = union_builder.build();
+      const auto finish_build(std::chrono::high_resolution_clock::now());
+      build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+      const auto start_update(std::chrono::high_resolution_clock::now());
+      size_t i = 0;
+      for (size_t j = 0; j < stream_length; j++) {
+        update_sketches[i]->update(counter, counter);
+        counter += golden64;
+        i++;
+        if (i == num_sketches_to_union) i = 0;
+      }
+      const auto finish_update(std::chrono::high_resolution_clock::now());
+      update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+      const auto start_compacting(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        update_sketches[i]->trim();
+        compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(update_sketches[i]->compact()));
+      }
+      const auto finish_compacting(std::chrono::high_resolution_clock::now());
+      compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compacting - start_compacting);
+
+      std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+      const auto start_serialize(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        compact_sketches[i]->serialize(s);
+      }
+      const auto finish_serialize(std::chrono::high_resolution_clock::now());
+      serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+      const auto start_deserialize(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(compact_tuple_sketch<double>::deserialize(s)));
+      }
+      const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+      deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+      const auto start_union(std::chrono::high_resolution_clock::now());
+      for (size_t i = 0; i < num_sketches_to_union; i++) {
+        u.update(*compact_sketches[i]);
+      }
+      const auto finish_union(std::chrono::high_resolution_clock::now());
+      union_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_union - start_union);
+
+      const auto start_result(std::chrono::high_resolution_clock::now());
+      auto result = u.get_result();
+      const auto finish_result(std::chrono::high_resolution_clock::now());
+      result_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_result - start_result);
+
+      num_retained += result.get_num_retained();
+    }
+
+    std::cout << stream_length << "\t"
+        << num_trials << "\t"
+        << (double) build_time_ns.count() / num_trials << "\t"
+        << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+        << (double) compact_time_ns.count() / num_trials << "\t"
+        << (double) serialize_time_ns.count() / num_trials << "\t"
+        << (double) deserialize_time_ns.count() / num_trials << "\t"
+        << (double) union_time_ns.count() / num_trials << "\t"
+        << (double) result_time_ns.count() / num_trials << "\t"
+        << (double) num_retained / num_trials
+        << std::endl;
+    stream_length = pwr_2_law_next(ppo, stream_length);
+  }
+
+}
+
+}
diff --git a/cpp/src/tuple_union_timing_profile.hpp b/cpp/src/tuple_union_timing_profile.hpp
new file mode 100644
index 0000000..9539e04
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_UNION_TIMING_PROFILE_HPP_
+#define TUPLE_UNION_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_union_timing_profile: public job_profile {
+public:
+  void run();
+};
+
+}
+
+#endif


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org