You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/02/15 09:04:23 UTC
[incubator-datasketches-cpp] 02/02: add varopt to python,
with changes to support human-readable to_string() even if type has
no operator<< defined
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch sampling
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
commit 4f91eba2e7b56d19a3120d5508705e09c7f764c5
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Sat Feb 15 01:04:08 2020 -0800
add varopt to python, with changes to support human-readable to_string() even if type has no operator<< defined
---
python/CMakeLists.txt | 2 +
python/src/datasketches.cpp | 2 +
python/src/vo_wrapper.cpp | 120 +++++++++++++++++++++++++++++++
sampling/include/var_opt_sketch.hpp | 5 ++
sampling/include/var_opt_sketch_impl.hpp | 16 ++++-
sampling/include/var_opt_union_impl.hpp | 1 -
6 files changed, 143 insertions(+), 3 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 1136382..d1a3f9f 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -34,6 +34,7 @@ target_link_libraries(python
cpc
fi
theta
+ sampling
pybind11::module
)
@@ -55,4 +56,5 @@ target_sources(python
src/cpc_wrapper.cpp
src/fi_wrapper.cpp
src/theta_wrapper.cpp
+ src/vo_wrapper.cpp
)
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index 158f97c..f8c138a 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -26,6 +26,7 @@ void init_kll(py::module& m);
void init_fi(py::module& m);
void init_cpc(py::module& m);
void init_theta(py::module& m);
+void init_vo(py::module& m);
PYBIND11_MODULE(datasketches, m) {
init_hll(m);
@@ -33,4 +34,5 @@ PYBIND11_MODULE(datasketches, m) {
init_fi(m);
init_cpc(m);
init_theta(m);
+ init_vo(m);
}
diff --git a/python/src/vo_wrapper.cpp b/python/src/vo_wrapper.cpp
new file mode 100644
index 0000000..a2346e5
--- /dev/null
+++ b/python/src/vo_wrapper.cpp
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "var_opt_sketch.hpp"
+#include "var_opt_union.hpp"
+
+#include <pybind11/pybind11.h>
+#include <sstream>
+
+namespace py = pybind11;
+
+namespace datasketches {
+namespace python {
+
+template<typename T>
+py::list vo_sketch_get_samples(const var_opt_sketch<T>& sk) {
+ py::list list;
+ for (auto& item : sk) {
+ py::tuple t = py::make_tuple(item.first, item.second);
+ list.append(t);
+ }
+ return list;
+}
+
+template<typename T>
+std::string vo_sketch_to_string(const var_opt_sketch<T>& sk, bool print_items) {
+ if (print_items) {
+ std::ostringstream ss;
+ sk.to_stream(ss);
+ ss << "### VarOpt Sketch Items" << std::endl;
+ int i = 0;
+ for (auto& item : sk) {
+ // item.second is always a double
+ // item.first is an arbitrary py::object, so get the value by
+ // using internal str() method then casting to C++ std::string
+ py::str item_pystr(item.first);
+ std::string item_str = py::cast<std::string>(item_pystr);
+ // item.second is guaranteed to be a double
+ ss << i++ << ": " << item_str << "\twt = " << item.second << std::endl;
+ }
+ return ss.str();
+ } else {
+ return sk.to_string();
+ }
+}
+
+template<typename T>
+std::string vo_union_to_string(const var_opt_union<T>& u) {
+ // no direct access to gadget so we can't easily print the item list
+ // for arbitrary python objects
+ return u.to_string();
+}
+
+}
+}
+
+namespace dspy = datasketches::python;
+
+template<typename T>
+void bind_vo_sketch(py::module &m, const char* name) {
+ using namespace datasketches;
+
+ py::class_<var_opt_sketch<T>>(m, name)
+ .def(py::init<uint32_t>(), py::arg("k"))
+ .def("__str__", &dspy::vo_sketch_to_string<T>, py::arg("print_items"))
+ .def("to_string", &dspy::vo_sketch_to_string<T>, py::arg("print_items"))
+ .def("update", (void (var_opt_sketch<T>::*)(const T&, double)) &var_opt_sketch<T>::update, py::arg("item"), py::arg("weight")=1.0)
+ .def_property_readonly("k", &var_opt_sketch<T>::get_k)
+ .def_property_readonly("n", &var_opt_sketch<T>::get_n)
+ .def_property_readonly("num_samples", &var_opt_sketch<T>::get_num_samples)
+ .def("get_samples", &dspy::vo_sketch_get_samples<T>)
+ .def("is_empty", &var_opt_sketch<T>::is_empty)
+ // As of writing, not yet clear how to serialize arbitrary python objects,
+ // especially in any sort of language-portable way
+ //.def("get_serialized_size_bytes", &var_opt_sketch<T>::get_serialized_size_bytes)
+ //.def("serialize", &dspy::vo_sketch_serialize<T>)
+ //.def_static("deserialize", &dspy::vo_sketch_deserialize<T>)
+ ;
+}
+
+template<typename T>
+void bind_vo_union(py::module &m, const char* name) {
+ using namespace datasketches;
+
+ py::class_<var_opt_union<T>>(m, name)
+ .def(py::init<uint32_t>(), py::arg("max_k"))
+ .def("__str__", &dspy::vo_union_to_string<T>)
+ .def("to_string", &dspy::vo_union_to_string<T>)
+ .def("update", (void (var_opt_union<T>::*)(const var_opt_sketch<T>& sk)) &var_opt_union<T>::update, py::arg("sketch"))
+ .def("get_result", &var_opt_union<T>::get_result)
+ .def("reset", &var_opt_union<T>::reset)
+ // As of writing, not yet clear how to serialize arbitrary python objects,
+ // especially in any sort of language-portable way
+ //.def("get_serialized_size_bytes", &var_opt_sketch<T>::get_serialized_size_bytes)
+ //.def("serialize", &dspy::vo_union_serialize<T>)
+ //.def_static("deserialize", &dspy::vo_union_deserialize<T>)
+ ;
+}
+
+
+void init_vo(py::module &m) {
+ bind_vo_sketch<py::object>(m, "varopt_object_sketch");
+ bind_vo_union<py::object>(m, "varopt_object_uunion");
+}
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index 7230537..0f044bd 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -90,6 +90,11 @@ class var_opt_sketch {
std::ostream& to_stream(std::ostream& os) const;
std::string to_string() const;
+ // These will only work for T with a defined operator<<()
+ // Kept separate to allow to_string() to compile for all types T
+ std::ostream& items_to_stream(std::ostream& os) const;
+ std::string items_to_string() const;
+
subset_summary estimate_subset_sum(std::function<bool(T)> predicate) const;
class const_iterator;
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index 4fea686..9673950 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -687,6 +687,13 @@ std::ostream& var_opt_sketch<T,S,A>::to_stream(std::ostream& os) const {
os << " weight_r : " << total_wt_r_ << std::endl;
os << " Current size : " << curr_items_alloc_ << std::endl;
os << " Resize factor: " << (1 << rf_) << std::endl;
+ os << "### END SKETCH SUMMARY" << std::endl;
+
+ return os;
+}
+
+template<typename T, typename S, typename A>
+std::ostream& var_opt_sketch<T,S,A>::items_to_stream(std::ostream& os) const {
os << "### Sketch Items" << std::endl;
uint32_t print_length = (n_ < k_ ? n_ : k_ + 1);
@@ -697,8 +704,6 @@ std::ostream& var_opt_sketch<T,S,A>::to_stream(std::ostream& os) const {
os << i << ": " << data_[i] << "\twt = " << weights_[i] << std::endl;
}
}
-
- os << "### END SKETCH SUMMARY" << std::endl;
return os;
}
@@ -710,6 +715,13 @@ std::string var_opt_sketch<T,S,A>::to_string() const {
return ss.str();
}
+template <typename T, typename S, typename A>
+std::string var_opt_sketch<T,S,A>::items_to_string() const {
+ std::ostringstream ss;
+ items_to_stream(ss);
+ return ss.str();
+}
+
template<typename T, typename S, typename A>
void var_opt_sketch<T,S,A>::update(const T& item, double weight, bool mark) {
if (weight <= 0.0) {
diff --git a/sampling/include/var_opt_union_impl.hpp b/sampling/include/var_opt_union_impl.hpp
index dd87ca8..adb21d4 100644
--- a/sampling/include/var_opt_union_impl.hpp
+++ b/sampling/include/var_opt_union_impl.hpp
@@ -313,7 +313,6 @@ std::string var_opt_union<T,S,A>::to_string() const {
return ss.str();
}
-
template<typename T, typename S, typename A>
void var_opt_union<T,S,A>::update(var_opt_sketch<T,S,A>& sk) {
merge_into(sk);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org