You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2020/11/25 00:27:36 UTC
[incubator-datasketches-characterization] 01/01: req sketch timing
This is an automated email from the ASF dual-hosted git repository.
alsay pushed a commit to branch req_sketch
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
commit 7d14c03489304a9f5ca9464fb9e5d97be820d60a
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 24 16:26:59 2020 -0800
req sketch timing
---
cpp/src/main.cpp | 3 +
cpp/src/req_sketch_timing_profile.hpp | 44 ++++++++
cpp/src/req_sketch_timing_profile_impl.hpp | 156 +++++++++++++++++++++++++++++
3 files changed, 203 insertions(+)
diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 8cd8362..0f62365 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -60,6 +60,8 @@
#include "hll_cross_language_profile.hpp"
+#include "req_sketch_timing_profile.hpp"
+
using namespace datasketches;
typedef std::unique_ptr<job_profile> job_profile_ptr;
@@ -78,6 +80,7 @@ int main(int argc, char **argv) {
job_profile::add("kll-merge-timing-string", job_profile_ptr(new kll_merge_timing_profile<std::string>()));
job_profile::add("fi-sketch-timing", job_profile_ptr(new frequent_items_sketch_timing_profile()));
job_profile::add("fi-merge-timing", job_profile_ptr(new frequent_items_merge_timing_profile()));
+ job_profile::add("req-sketch-timing-float", job_profile_ptr(new req_sketch_timing_profile<float>()));
job_profile::add("cpc-sketch-accuracy", job_profile_ptr(new cpc_sketch_accuracy_profile()));
job_profile::add("cpc-union-accuracy", job_profile_ptr(new cpc_union_accuracy_profile()));
diff --git a/cpp/src/req_sketch_timing_profile.hpp b/cpp/src/req_sketch_timing_profile.hpp
new file mode 100644
index 0000000..9df3799
--- /dev/null
+++ b/cpp/src/req_sketch_timing_profile.hpp
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_SKETCH_TIMING_PROFILE_HPP_
+#define REQ_SKETCH_TIMING_PROFILE_HPP_
+
+#include <random>
+
+#include "job_profile.hpp"
+
+namespace datasketches {
+
+template<typename T>
+class req_sketch_timing_profile: public job_profile {
+public:
+ req_sketch_timing_profile();
+ void run();
+ T sample();
+private:
+ std::default_random_engine generator;
+ std::uniform_real_distribution<float> distribution;
+};
+
+}
+
+#include "req_sketch_timing_profile_impl.hpp"
+
+#endif
diff --git a/cpp/src/req_sketch_timing_profile_impl.hpp b/cpp/src/req_sketch_timing_profile_impl.hpp
new file mode 100644
index 0000000..1a5cb2e
--- /dev/null
+++ b/cpp/src/req_sketch_timing_profile_impl.hpp
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef REQ_SKETCH_TIMING_PROFILE_IMPL_HPP_
+#define REQ_SKETCH_TIMING_PROFILE_IMPL_HPP_
+
+#include <iostream>
+#include <algorithm>
+#include <random>
+#include <chrono>
+
+#include <req_sketch.hpp>
+
+namespace datasketches {
+
+template<typename T>
+req_sketch_timing_profile<T>::req_sketch_timing_profile():
+generator(std::chrono::system_clock::now().time_since_epoch().count()),
+distribution(0.0, 1.0)
+{}
+
+template<typename T>
+void req_sketch_timing_profile<T>::run() {
+ const size_t lg_min_stream_len(0);
+ const size_t lg_max_stream_len(23);
+ const size_t ppo(16);
+
+ const size_t lg_max_trials(14);
+ const size_t lg_min_trials(4);
+
+ const size_t num_queries(20);
+
+ const bool hra = true;
+ const uint16_t k = 12;
+
+ std::cout << "Stream\tTrials\tBuild\tUpdate\tQuant\tQuants\tRank\tCDF\tSer\tDeser\tItems\tSize" << std::endl;
+
+ size_t max_len = 1 << lg_max_stream_len;
+
+ std::vector<T> values(max_len);
+
+ std::vector<T> rank_query_values(num_queries);
+ for (size_t i = 0; i < num_queries; i++) rank_query_values[i] = sample();
+ std::sort(&rank_query_values[0], &rank_query_values[num_queries]);
+
+ double quantile_query_values[num_queries];
+ for (size_t i = 0; i < num_queries; i++) quantile_query_values[i] = distribution(generator);
+
+ size_t stream_length(1 << lg_min_stream_len);
+ while (stream_length <= (1 << lg_max_stream_len)) {
+
+ std::chrono::nanoseconds build_time_ns(0);
+ std::chrono::nanoseconds update_time_ns(0);
+ std::chrono::nanoseconds get_quantile_time_ns(0);
+ std::chrono::nanoseconds get_quantiles_time_ns(0);
+ std::chrono::nanoseconds get_rank_time_ns(0);
+ std::chrono::nanoseconds get_cdf_time_ns(0);
+ std::chrono::nanoseconds serialize_time_ns(0);
+ std::chrono::nanoseconds deserialize_time_ns(0);
+ size_t num_retained(0);
+ size_t size_bytes(0);
+
+ const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials);
+ for (size_t i = 0; i < num_trials; i++) {
+ for (size_t i = 0; i < stream_length; i++) values[i] = sample();
+
+ auto start_build(std::chrono::high_resolution_clock::now());
+ req_sketch<T, hra> sketch(k);
+ auto finish_build(std::chrono::high_resolution_clock::now());
+ build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build);
+
+ auto start_update(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < stream_length; i++) sketch.update(values[i]);
+ auto finish_update(std::chrono::high_resolution_clock::now());
+ update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update);
+
+ auto start_get_quantile(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_queries; i++) sketch.get_quantile(quantile_query_values[i]);
+ auto finish_get_quantile(std::chrono::high_resolution_clock::now());
+ get_quantile_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_quantile - start_get_quantile);
+
+// auto start_get_quantiles(std::chrono::high_resolution_clock::now());
+// sketch.get_quantiles(quantile_query_values, num_queries);
+// auto finish_get_quantiles(std::chrono::high_resolution_clock::now());
+// get_quantiles_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_quantiles - start_get_quantiles);
+
+ auto start_get_rank(std::chrono::high_resolution_clock::now());
+ for (size_t i = 0; i < num_queries; i++) {
+ volatile double rank = sketch.get_rank(rank_query_values[i]); // volatile to prevent this from being optimized away
+ }
+ auto finish_get_rank(std::chrono::high_resolution_clock::now());
+ get_rank_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_rank - start_get_rank);
+
+// auto start_get_cdf(std::chrono::high_resolution_clock::now());
+// sketch.get_CDF(rank_query_values.data(), num_queries);
+// auto finish_get_cdf(std::chrono::high_resolution_clock::now());
+// get_cdf_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_get_cdf - start_get_cdf);
+
+ auto start_serialize(std::chrono::high_resolution_clock::now());
+ auto bytes = sketch.serialize();
+ auto finish_serialize(std::chrono::high_resolution_clock::now());
+ serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize);
+
+ auto start_deserialize(std::chrono::high_resolution_clock::now());
+ auto deserialized_sketch = req_sketch<T, hra>::deserialize(bytes.data(), bytes.size());
+ auto finish_deserialize(std::chrono::high_resolution_clock::now());
+ deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize);
+
+ num_retained += sketch.get_num_retained();
+ size_bytes += bytes.size();
+ }
+ std::cout << stream_length << "\t"
+ << num_trials << "\t"
+ << (double) build_time_ns.count() / num_trials << "\t"
+ << (double) update_time_ns.count() / num_trials / stream_length << "\t"
+ << (double) get_quantile_time_ns.count() / num_trials / num_queries << "\t"
+// << (double) get_quantiles_time_ns.count() / num_trials / num_queries << "\t"
+ << (double) get_rank_time_ns.count() / num_trials / num_queries << "\t"
+// << (double) get_cdf_time_ns.count() / num_trials / num_queries << "\t"
+ << (double) serialize_time_ns.count() / num_trials << "\t"
+ << (double) deserialize_time_ns.count() / num_trials << "\t"
+ << num_retained / num_trials << "\t"
+ << size_bytes / num_trials << std::endl;
+ stream_length = pwr_2_law_next(ppo, stream_length);
+ }
+}
+
+template<>
+float req_sketch_timing_profile<float>::sample() {
+ return distribution(generator);
+}
+
+template<>
+std::string req_sketch_timing_profile<std::string>::sample() {
+ return std::to_string(distribution(generator));
+}
+
+}
+
+#endif
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org