You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/07/15 20:34:23 UTC
[incubator-datasketches-characterization] 01/01: tuple sketch and
union timing
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch tuple_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
commit 16721a2d145c0d28a492fb9805f221c2dd482fd7
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Wed Jul 15 13:33:14 2020 -0700
tuple sketch and union timing
---
cpp/src/main.cpp | 5 ++
cpp/src/tuple_sketch_timing_profile.cpp | 106 ++++++++++++++++++++++++
cpp/src/tuple_sketch_timing_profile.hpp | 34 ++++++++
cpp/src/tuple_union_timing_profile.cpp | 142 ++++++++++++++++++++++++++++++++
cpp/src/tuple_union_timing_profile.hpp | 34 ++++++++
5 files changed, 321 insertions(+)
diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 9317d63..8cd8362 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -30,6 +30,9 @@
#include "theta_sketch_timing_profile.hpp"
#include "theta_union_timing_profile.hpp"
+#include "tuple_sketch_timing_profile.hpp"
+#include "tuple_union_timing_profile.hpp"
+
#include "kll_sketch_timing_profile.hpp"
#include "kll_merge_timing_profile.hpp"
@@ -67,6 +70,8 @@ int main(int argc, char **argv) {
job_profile::add("hll-union-timing", job_profile_ptr(new hll_union_timing_profile()));
job_profile::add("theta-sketch-timing", job_profile_ptr(new theta_sketch_timing_profile()));
job_profile::add("theta-union-timing", job_profile_ptr(new theta_union_timing_profile()));
+ job_profile::add("tuple-sketch-timing", job_profile_ptr(new tuple_sketch_timing_profile()));
+ job_profile::add("tuple-union-timing", job_profile_ptr(new tuple_union_timing_profile()));
job_profile::add("kll-sketch-timing-float", job_profile_ptr(new kll_sketch_timing_profile<float>()));
job_profile::add("kll-sketch-timing-string", job_profile_ptr(new kll_sketch_timing_profile<std::string>()));
job_profile::add("kll-merge-timing-float", job_profile_ptr(new kll_merge_timing_profile<float>()));
diff --git a/cpp/src/tuple_sketch_timing_profile.cpp b/cpp/src/tuple_sketch_timing_profile.cpp
new file mode 100644
index 0000000..91e6a8f
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.cpp
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_sketch.hpp>
+
+#include "tuple_sketch_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_sketch_timing_profile::run() {
+ const size_t lg_min_stream_len(0);
+ const size_t lg_max_stream_len(24);
+ const size_t ppo(16);
+
+ const size_t lg_max_trials(16);
+ const size_t lg_min_trials(8);
+
+ update_tuple_sketch<double>::builder builder;
+
+ // some arbitrary starting value
+ uint64_t counter(35538947);
+
+ const uint64_t golden64(0x9e3779b97f4a7c13ULL); // the golden ratio
+
+ std::cout << "Stream\tTrials\tBuild\tUpdate\tSer\tDeser\tSize" << std::endl;
+
+ size_t stream_length(1 << lg_min_stream_len);
+ while (stream_length <= (1 << lg_max_stream_len)) {
+
+ std::chrono::nanoseconds build_time_ns(0);
+ std::chrono::nanoseconds update_time_ns(0);
+ std::chrono::nanoseconds compact_time_ns(0);
+ std::chrono::nanoseconds serialize_time_ns(0);
+ std::chrono::nanoseconds deserialize_time_ns(0);
+ size_t size_bytes(0);
+
+ const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+
+ for (size_t i = 0; i < num_trials; i++) {
+ const auto start_build(std::chrono::high_resolution_clock::now());
+ update_tuple_sketch<double> sketch = builder.build();
+ const auto finish_build(std::chrono::high_resolution_clock::now());
+ build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+ const auto start_update(std::chrono::high_resolution_clock::now());
+ for (size_t j = 0; j < stream_length; j++) {
+ sketch.update(counter, 1);
+ counter += golden64;
+ }
+ const auto finish_update(std::chrono::high_resolution_clock::now());
+ update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+ auto start_compact(std::chrono::high_resolution_clock::now());
+ auto compact_sketch = sketch.compact();
+ const auto finish_compact(std::chrono::high_resolution_clock::now());
+ compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compact - start_compact);
+
+ auto start_serialize(std::chrono::high_resolution_clock::now());
+ auto bytes = compact_sketch.serialize();
+ const auto finish_serialize(std::chrono::high_resolution_clock::now());
+ serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+ const auto start_deserialize(std::chrono::high_resolution_clock::now());
+ auto deserialized_sketch = compact_tuple_sketch<double>::deserialize(bytes.data(), bytes.size());
+ const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+ deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+ size_bytes += bytes.size();
+ }
+
+ std::cout << stream_length << "\t"
+ << num_trials << "\t"
+ << (double) build_time_ns.count() / num_trials << "\t"
+ << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+ << (double) serialize_time_ns.count() / num_trials << "\t"
+ << (double) deserialize_time_ns.count() / num_trials << "\t"
+ << (double) size_bytes / num_trials << "\t"
+ << std::endl;
+ stream_length = pwr_2_law_next(ppo, stream_length);
+ }
+
+}
+
+}
diff --git a/cpp/src/tuple_sketch_timing_profile.hpp b/cpp/src/tuple_sketch_timing_profile.hpp
new file mode 100644
index 0000000..a41ffe7
--- /dev/null
+++ b/cpp/src/tuple_sketch_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_SKETCH_TIMING_PROFILE_HPP_
+#define TUPLE_SKETCH_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_sketch_timing_profile: public job_profile {
+public:
+ void run();
+};
+
+}
+
+#endif
diff --git a/cpp/src/tuple_union_timing_profile.cpp b/cpp/src/tuple_union_timing_profile.cpp
new file mode 100644
index 0000000..79c5f34
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.cpp
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+#include <sstream>
+
+#include <tuple_union.hpp>
+
+#include "tuple_union_timing_profile.hpp"
+
+namespace datasketches {
+
+void tuple_union_timing_profile::run() {
+ const size_t lg_min_stream_len = 0;
+ const size_t lg_max_stream_len = 26;
+ const size_t ppo = 16;
+
+ const size_t lg_max_trials = 14;
+ const size_t lg_min_trials = 6;
+
+ const int lg_k = 12;
+ const int num_sketches_to_union = 32;
+
+ // some arbitrary starting value
+ uint64_t counter = 35538947;
+ const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
+
+ std::cout << "Stream\tTrials\tBuild\tUpdate\tCompact\tSerialize\tDeserialize\tUnion\tResult" << std::endl;
+
+ std::unique_ptr<update_tuple_sketch<double>> update_sketches[num_sketches_to_union];
+ std::unique_ptr<compact_tuple_sketch<double>> compact_sketches[num_sketches_to_union];
+
+ auto sketch_builder = update_tuple_sketch<double>::builder().set_lg_k(lg_k);
+ auto union_builder = tuple_union<double>::builder().set_lg_k(lg_k);
+
+ size_t stream_length = 1 << lg_min_stream_len;
+ while (stream_length <= (1 << lg_max_stream_len)) {
+
+ std::chrono::nanoseconds build_time_ns(0);
+ std::chrono::nanoseconds update_time_ns(0);
+ std::chrono::nanoseconds compact_time_ns(0);
+ std::chrono::nanoseconds serialize_time_ns(0);
+ std::chrono::nanoseconds deserialize_time_ns(0);
+ std::chrono::nanoseconds union_time_ns(0);
+ std::chrono::nanoseconds result_time_ns(0);
+ size_t num_retained = 0;
+
+ const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+ for (size_t t = 0; t < num_trials; t++) {
+ const auto start_build(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_sketches_to_union; i++) {
+ update_sketches[i] = std::unique_ptr<update_tuple_sketch<double>>(new update_tuple_sketch<double>(sketch_builder.build()));
+ }
+ tuple_union<double> u = union_builder.build();
+ const auto finish_build(std::chrono::high_resolution_clock::now());
+ build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+ const auto start_update(std::chrono::high_resolution_clock::now());
+ size_t i = 0;
+ for (size_t j = 0; j < stream_length; j++) {
+ update_sketches[i]->update(counter, counter);
+ counter += golden64;
+ i++;
+ if (i == num_sketches_to_union) i = 0;
+ }
+ const auto finish_update(std::chrono::high_resolution_clock::now());
+ update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+ const auto start_compacting(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_sketches_to_union; i++) {
+ update_sketches[i]->trim();
+ compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(update_sketches[i]->compact()));
+ }
+ const auto finish_compacting(std::chrono::high_resolution_clock::now());
+ compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compacting - start_compacting);
+
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+ const auto start_serialize(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_sketches_to_union; i++) {
+ compact_sketches[i]->serialize(s);
+ }
+ const auto finish_serialize(std::chrono::high_resolution_clock::now());
+ serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+ const auto start_deserialize(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_sketches_to_union; i++) {
+ compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(compact_tuple_sketch<double>::deserialize(s)));
+ }
+ const auto finish_deserialize(std::chrono::high_resolution_clock::now());
+ deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+ const auto start_union(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_sketches_to_union; i++) {
+ u.update(*compact_sketches[i]);
+ }
+ const auto finish_union(std::chrono::high_resolution_clock::now());
+ union_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_union - start_union);
+
+ const auto start_result(std::chrono::high_resolution_clock::now());
+ auto result = u.get_result();
+ const auto finish_result(std::chrono::high_resolution_clock::now());
+ result_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_result - start_result);
+
+ num_retained += result.get_num_retained();
+ }
+
+ std::cout << stream_length << "\t"
+ << num_trials << "\t"
+ << (double) build_time_ns.count() / num_trials << "\t"
+ << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+ << (double) compact_time_ns.count() / num_trials << "\t"
+ << (double) serialize_time_ns.count() / num_trials << "\t"
+ << (double) deserialize_time_ns.count() / num_trials << "\t"
+ << (double) union_time_ns.count() / num_trials << "\t"
+ << (double) result_time_ns.count() / num_trials << "\t"
+ << (double) num_retained / num_trials
+ << std::endl;
+ stream_length = pwr_2_law_next(ppo, stream_length);
+ }
+
+}
+
+}
diff --git a/cpp/src/tuple_union_timing_profile.hpp b/cpp/src/tuple_union_timing_profile.hpp
new file mode 100644
index 0000000..9539e04
--- /dev/null
+++ b/cpp/src/tuple_union_timing_profile.hpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TUPLE_UNION_TIMING_PROFILE_HPP_
+#define TUPLE_UNION_TIMING_PROFILE_HPP_
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+class tuple_union_timing_profile: public job_profile {
+public:
+ void run();
+};
+
+}
+
+#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org