You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2022/04/29 08:09:01 UTC

[datasketches-characterization] branch quantiles_accuracy created (now 2d9bafd)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch quantiles_accuracy
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git


      at 2d9bafd  rename binary to characterization-cpp and change output location, add quantiles accuracy test

This branch includes the following new commits:

     new 2d9bafd  rename binary to characterization-cpp and change output location, add quantiles accuracy test

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-characterization] 01/01: rename binary to characterization-cpp and change output location, add quantiles accuracy test

Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch quantiles_accuracy
in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git

commit 2d9bafd8f7e9c82cf79e383414e7a688ef690fcf
Author: Jon <jm...@apache.org>
AuthorDate: Fri Apr 29 01:08:49 2022 -0700

    rename binary to characterization-cpp and change output location, add quantiles accuracy test
---
 CMakeLists.txt                                |  4 ++-
 cpp/CMakeLists.txt                            | 14 ++++----
 cpp/src/main.cpp                              |  3 ++
 cpp/src/quantiles_sketch_accuracy_profile.cpp | 47 +++++++++++++++++++++++++++
 cpp/src/quantiles_sketch_accuracy_profile.hpp | 35 ++++++++++++++++++++
 5 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e111a26..4b11403 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -75,7 +75,9 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
     "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
 endif()
 
+# build the executable wherever we run cmake, not a subdirectory
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+
 ###### TARGETS ######
-# do we need the next line since we don't actually make a library anymore?
 
 add_subdirectory(cpp)
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 5c53da1..98bd532 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -15,20 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_executable(characterization)
+add_executable(characterization-cpp)
 
 find_package(DataSketches 3.2 REQUIRED)
-target_link_libraries(characterization PUBLIC ${DATASKETCHES_LIB})
-target_include_directories(characterization PUBLIC ${DATASKETCHES_INCLUDE_DIR})
+target_link_libraries(characterization-cpp PUBLIC ${DATASKETCHES_LIB})
+target_include_directories(characterization-cpp PUBLIC ${DATASKETCHES_INCLUDE_DIR})
 
-target_compile_features(common INTERFACE cxx_std_11)
-
-set_target_properties(characterization PROPERTIES
+set_target_properties(characterization-cpp PROPERTIES
   CXX_STANDARD 11
   CXX_STANDARD_REQUIRED YES
 )
 
-target_sources(characterization
+target_sources(characterization-cpp
   PRIVATE
     src/main.cpp
     src/job_profile.cpp
@@ -78,6 +76,8 @@ target_sources(characterization
     src/kll_sketch_timing_profile_impl.hpp
     src/memory_usage_profile.cpp
     src/memory_usage_profile.hpp
+    src/quantiles_sketch_accuracy_profile.cpp
+    src/quantiles_sketch_accuracy_profile.hpp
     src/req_merge_timing_profile.hpp
     src/req_merge_timing_profile_impl.hpp
     src/req_sketch_timing_profile.hpp
diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp
index 3b09818..2e5abda 100644
--- a/cpp/src/main.cpp
+++ b/cpp/src/main.cpp
@@ -63,6 +63,8 @@
 #include "req_sketch_timing_profile.hpp"
 #include "req_merge_timing_profile.hpp"
 
+#include "quantiles_sketch_accuracy_profile.hpp"
+
 using namespace datasketches;
 typedef std::unique_ptr<job_profile> job_profile_ptr;
 
@@ -93,6 +95,7 @@ int main(int argc, char **argv) {
   job_profile::add("kll-sketch-accuracy", job_profile_ptr(new kll_sketch_accuracy_profile()));
   job_profile::add("kll-merge-accuracy", job_profile_ptr(new kll_merge_accuracy_profile()));
   job_profile::add("fi-sketch-accuracy", job_profile_ptr(new frequent_items_sketch_accuracy_profile()));
+  job_profile::add("quantiles-sketch-accuracy", job_profile_ptr(new quantiles_sketch_accuracy_profile()));
 
   job_profile::add("cpc-sketch-memory", job_profile_ptr(new cpc_sketch_memory_profile()));
   job_profile::add("hll-sketch-memory", job_profile_ptr(new hll_sketch_memory_profile()));
diff --git a/cpp/src/quantiles_sketch_accuracy_profile.cpp b/cpp/src/quantiles_sketch_accuracy_profile.cpp
new file mode 100644
index 0000000..3621fd9
--- /dev/null
+++ b/cpp/src/quantiles_sketch_accuracy_profile.cpp
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <algorithm>
+#include <random>
+#include <chrono>
+
+#include <quantiles_sketch.hpp>
+
+#include "quantiles_sketch_accuracy_profile.hpp"
+
+namespace datasketches {
+
+double quantiles_sketch_accuracy_profile::run_trial(float* values, unsigned stream_length) {
+  unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
+  std::shuffle(values, values + stream_length, std::default_random_engine(seed));
+
+  quantiles_sketch<double> sketch(128);
+  for (size_t i = 0; i < stream_length; i++) sketch.update(values[i]);
+
+  double max_rank_error = 0;
+  for (size_t i = 0; i < stream_length; i++) {
+    double true_rank = (double) i / stream_length;
+    double est_rank = sketch.get_rank(i);
+    max_rank_error = std::max(max_rank_error, fabs(true_rank - est_rank));
+  }
+
+  return max_rank_error;
+}
+
+}
diff --git a/cpp/src/quantiles_sketch_accuracy_profile.hpp b/cpp/src/quantiles_sketch_accuracy_profile.hpp
new file mode 100644
index 0000000..7af2f91
--- /dev/null
+++ b/cpp/src/quantiles_sketch_accuracy_profile.hpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef QUANTILES_SKETCH_ACCURACY_PROFILE_HPP_
+#define QUANTILES_SKETCH_ACCURACY_PROFILE_HPP_
+
+// intentionally inheriting from the kll profile
+#include "kll_accuracy_profile.hpp"
+
+namespace datasketches {
+
+class quantiles_sketch_accuracy_profile: public kll_accuracy_profile {
+public:
+  double run_trial(float* values, unsigned stream_length);
+};
+
+}
+
+#endif


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org