You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2022/12/23 20:15:11 UTC

[datasketches-cpp] 02/02: rearranged code to simplify

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch python_wrapper_improvement
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 98a46081820f00a6ab7372ea871607ae63cc644b
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Fri Dec 23 12:15:03 2022 -0800

    rearranged code to simplify
---
 python/src/fi_wrapper.cpp  | 114 ++++++++++++++++++++-------------------------
 python/src/hll_wrapper.cpp |  56 ++++++++++------------
 2 files changed, 75 insertions(+), 95 deletions(-)

diff --git a/python/src/fi_wrapper.cpp b/python/src/fi_wrapper.cpp
index 1c217e1..bdb49a4 100644
--- a/python/src/fi_wrapper.cpp
+++ b/python/src/fi_wrapper.cpp
@@ -17,62 +17,11 @@
  * under the License.
  */
 
-#include "frequent_items_sketch.hpp"
-
 #include <pybind11/pybind11.h>
-#include <sstream>
-
-namespace py = pybind11;
 
-namespace datasketches {
-namespace python {
-
-template<typename T>
-frequent_items_sketch<T> fi_sketch_deserialize(py::bytes skBytes) {
-  std::string skStr = skBytes; // implicit cast  
-  return frequent_items_sketch<T>::deserialize(skStr.c_str(), skStr.length());
-}
-
-template<typename T>
-py::object fi_sketch_serialize(const frequent_items_sketch<T>& sk) {
-  auto serResult = sk.serialize();
-  return py::bytes((char*)serResult.data(), serResult.size());
-}
-
-// maybe possible to disambiguate the static vs method get_epsilon calls, but
-// this is easier for now
-template<typename T>
-double fi_sketch_get_generic_epsilon(uint8_t lg_max_map_size) {
-  return frequent_items_sketch<T>::get_epsilon(lg_max_map_size);
-}
-
-template<typename T>
-py::list fi_sketch_get_frequent_items(const frequent_items_sketch<T>& sk,
-                                   frequent_items_error_type err_type,
-                                   uint64_t threshold = 0) {
-  if (threshold == 0) { threshold = sk.get_maximum_error(); }
-
-  py::list list;
-  auto items = sk.get_frequent_items(err_type, threshold);
-  for (auto iter = items.begin(); iter != items.end(); ++iter) {
-    py::tuple t = py::make_tuple(iter->get_item(),
-                                 iter->get_estimate(),
-                                 iter->get_lower_bound(),
-                                 iter->get_upper_bound());
-    list.append(t);
-  }
-  return list;
-}
-
-template<typename T>
-size_t fi_sketch_get_serialized_size_bytes(const frequent_items_sketch<T>& sk) {
-  return sk.get_serialized_size_bytes();
-}
-
-}
-}
+#include "frequent_items_sketch.hpp"
 
-namespace dspy = datasketches::python;
+namespace py = pybind11;
 
 template<typename T>
 void bind_fi_sketch(py::module &m, const char* name) {
@@ -86,7 +35,6 @@ void bind_fi_sketch(py::module &m, const char* name) {
          "Produces a string summary of the sketch")
     .def("update", (void (frequent_items_sketch<T>::*)(const T&, uint64_t)) &frequent_items_sketch<T>::update, py::arg("item"), py::arg("weight")=1,
          "Updates the sketch with the given string and, optionally, a weight")
-    .def("get_frequent_items", &dspy::fi_sketch_get_frequent_items<T>, py::arg("err_type"), py::arg("threshold")=0)
     .def("merge", (void (frequent_items_sketch<T>::*)(const frequent_items_sketch<T>&)) &frequent_items_sketch<T>::merge,
          "Merges the given sketch into this one")
     .def("is_empty", &frequent_items_sketch<T>::is_empty,
@@ -105,15 +53,55 @@ void bind_fi_sketch(py::module &m, const char* name) {
          "Returns the guaranteed upper bound weight (frequency) of the given item.")
     .def("get_sketch_epsilon", (double (frequent_items_sketch<T>::*)(void) const) &frequent_items_sketch<T>::get_epsilon,
          "Returns the epsilon value used by the sketch to compute error")
-    .def_static("get_epsilon_for_lg_size", &dspy::fi_sketch_get_generic_epsilon<T>, py::arg("lg_max_map_size"),
-         "Returns the epsilon value used to compute a priori error for a given log2(max_map_size)")
-    .def_static("get_apriori_error", &frequent_items_sketch<T>::get_apriori_error, py::arg("lg_max_map_size"), py::arg("estimated_total_weight"),
-         "Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight.")
-    .def("get_serialized_size_bytes", &dspy::fi_sketch_get_serialized_size_bytes<T>,
-         "Computes the size needed to serialize the current state of the sketch. This can be expensive since every item needs to be looked at.")
-    .def("serialize", &dspy::fi_sketch_serialize<T>, "Serializes the sketch into a bytes object")
-    .def_static("deserialize", &dspy::fi_sketch_deserialize<T>, "Reads a bytes object and returns the corresponding frequent_strings_sketch")
-    ;
+    .def(
+        "get_frequent_items",
+        [](const frequent_items_sketch<T>& sk, frequent_items_error_type err_type, uint64_t threshold) {
+          if (threshold == 0) threshold = sk.get_maximum_error();
+          py::list list;
+          auto rows = sk.get_frequent_items(err_type, threshold);
+          for (auto row: rows) {
+            list.append(py::make_tuple(
+                row.get_item(),
+                row.get_estimate(),
+                row.get_lower_bound(),
+                row.get_upper_bound())
+            );
+          }
+          return list;
+        },
+        py::arg("err_type"), py::arg("threshold")=0
+    )
+    .def_static(
+        "get_epsilon_for_lg_size",
+        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T>::get_epsilon(lg_max_map_size); },
+        py::arg("lg_max_map_size"),
+        "Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
+    )
+    .def_static(
+        "get_apriori_error",
+        &frequent_items_sketch<T>::get_apriori_error,
+        py::arg("lg_max_map_size"), py::arg("estimated_total_weight"),
+        "Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
+    )
+    .def(
+        "get_serialized_size_bytes",
+        [](const frequent_items_sketch<T>& sk) { return sk.get_serialized_size_bytes(); },
+        "Computes the size needed to serialize the current state of the sketch. This can be expensive since every item needs to be looked at."
+    )
+    .def(
+        "serialize",
+        [](const frequent_items_sketch<T>& sk) {
+          auto bytes = sk.serialize();
+          return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+        },
+        "Serializes the sketch into a bytes object"
+    )
+    .def_static(
+        "deserialize",
+        [](const std::string& bytes) { return frequent_items_sketch<T>::deserialize(bytes.data(), bytes.size()); },
+        py::arg("bytes"),
+        "Reads a bytes object and returns the corresponding frequent_strings_sketch"
+    );
 }
 
 void init_fi(py::module &m) {
diff --git a/python/src/hll_wrapper.cpp b/python/src/hll_wrapper.cpp
index 0491074..24da90a 100644
--- a/python/src/hll_wrapper.cpp
+++ b/python/src/hll_wrapper.cpp
@@ -17,34 +17,11 @@
  * under the License.
  */
 
-#include "hll.hpp"
-
 #include <pybind11/pybind11.h>
 
-namespace py = pybind11;
-
-namespace datasketches {
-namespace python {
-
-hll_sketch hll_sketch_deserialize(py::bytes skBytes) {
-  std::string skStr = skBytes; // implicit cast  
-  return hll_sketch::deserialize(skStr.c_str(), skStr.length());
-}
-
-py::object hll_sketch_serialize_compact(const hll_sketch& sk) {
-  auto serResult = sk.serialize_compact();
-  return py::bytes((char*)serResult.data(), serResult.size());
-}
-
-py::object hll_sketch_serialize_updatable(const hll_sketch& sk) {
-  auto serResult = sk.serialize_updatable();
-  return py::bytes((char*)serResult.data(), serResult.size());
-}
-
-}
-}
+#include "hll.hpp"
 
-namespace dspy = datasketches::python;
+namespace py = pybind11;
 
 void init_hll(py::module &m) {
   using namespace datasketches;
@@ -59,12 +36,6 @@ void init_hll(py::module &m) {
     .def(py::init<uint8_t>(), py::arg("lg_k"))
     .def(py::init<uint8_t, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
     .def(py::init<uint8_t, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
-    .def_static("deserialize", &dspy::hll_sketch_deserialize,
-         "Reads a bytes object and returns the corresponding hll_sketch")
-    .def("serialize_compact", &dspy::hll_sketch_serialize_compact,
-         "Serializes the sketch into a bytes object, compressiong the exception table if HLL_4")
-    .def("serialize_updatable", &dspy::hll_sketch_serialize_updatable,
-         "Serializes the sketch into a bytes object")
     .def("__str__", (std::string (hll_sketch::*)(bool,bool,bool,bool) const) &hll_sketch::to_string,
          py::arg("summary")=true, py::arg("detail")=false, py::arg("aux_detail")=false, py::arg("all")=false,
          "Produces a string summary of the sketch")
@@ -101,7 +72,28 @@ void init_hll(py::module &m) {
     .def_static("get_rel_err", &hll_sketch::get_rel_err,
          py::arg("upper_bound"), py::arg("unioned"), py::arg("lg_k"), py::arg("num_std_devs"),
          "Retuns the a priori relative error bound for the given parameters")
-    ;
+    .def(
+        "serialize_compact",
+        [](const hll_sketch& sk) {
+          auto bytes = sk.serialize_compact();
+          return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+        },
+        "Serializes the sketch into a bytes object, compressiong the exception table if HLL_4"
+    )
+    .def(
+        "serialize_updatable",
+        [](const hll_sketch& sk) {
+          auto bytes = sk.serialize_updatable();
+          return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+        },
+        "Serializes the sketch into a bytes object"
+    )
+    .def_static(
+        "deserialize",
+        [](const std::string& bytes) { return hll_sketch::deserialize(bytes.data(), bytes.size()); },
+        py::arg("bytes"),
+        "Reads a bytes object and returns the corresponding hll_sketch"
+    );
 
   py::class_<hll_union>(m, "hll_union")
     .def(py::init<uint8_t>(), py::arg("lg_max_k"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org