You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2019/06/25 19:23:02 UTC
[incubator-datasketches-cpp] branch pybind11 updated: add kll
support to python with pybind11
This is an automated email from the ASF dual-hosted git repository.
jmalkin pushed a commit to branch pybind11
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git
The following commit(s) were added to refs/heads/pybind11 by this push:
new adca8ba add kll support to python with pybind11
adca8ba is described below
commit adca8babab1d5e30f4325fd086596f31fc9e1d58
Author: jmalkin <jm...@users.noreply.github.com>
AuthorDate: Tue Jun 25 12:15:40 2019 -0700
add kll support to python with pybind11
---
python/CMakeLists.txt | 19 ++----
python/src/datasketches.cpp | 4 +-
python/src/kll_wrapper.cpp | 162 +++++++++++++++++---------------------------
3 files changed, 71 insertions(+), 114 deletions(-)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 51969ce..b37171c 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,5 +1,4 @@
-#find_package(Python3 REQUIRED COMPONENTS Development)
-
+# TODO: Can we force python version >= 3.0?
if (MSVC)
set(PYBIND11_CPP_STANDARD /std:c++11)
else()
@@ -16,25 +15,16 @@ target_link_libraries(python
PRIVATE
common
hll
- #kll
+ kll
cpc
fi
- #${Python3_LIBRARIES}
+ #theta
pybind11::module
)
-#target_include_directories(python
-# PRIVATE
-# ${Python3_INCLUDE_DIRS}
-#)
-
set_target_properties(python PROPERTIES
PREFIX ""
OUTPUT_NAME datasketches
-# POSITION_INDEPENDENT_CODE ON
-# LINKER_LANGUAGE CXX
-# CXX_STANDARD 11
-# CXX_STANDARD_REQUIRED YES
)
# ensure we make a .so on Mac rather than .dylib
@@ -46,7 +36,8 @@ target_sources(python
PRIVATE
src/datasketches.cpp
src/hll_wrapper.cpp
- #src/kll_wrapper.cpp
+ src/kll_wrapper.cpp
src/cpc_wrapper.cpp
src/fi_wrapper.cpp
+ #stc/theta_wrapper.cpp
)
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index 1e46402..5e97f57 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -22,14 +22,14 @@
namespace py = pybind11;
void init_hll(py::module& m);
-//void init_kll(py::module& m);
+void init_kll(py::module& m);
void init_fi(py::module& m);
void init_cpc(py::module& m);
//void init_theta(py::module& m);
PYBIND11_MODULE(datasketches, m) {
init_hll(m);
- //init_kll(m);
+ init_kll(m);
init_fi(m);
init_cpc(m);
//init_theta(m);
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 9a60185..7219f06 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -18,115 +18,85 @@
*/
#include "kll_sketch.hpp"
-#include <boost/python.hpp>
-namespace bpy = boost::python;
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <sstream>
+
+namespace py = pybind11;
namespace datasketches {
namespace python {
template<typename T>
-kll_sketch<T>* KllSketch_deserialize(bpy::object obj) {
- PyObject* skBytes = obj.ptr();
- if (!PyBytes_Check(skBytes)) {
- PyErr_SetString(PyExc_TypeError, "Attmpted to deserialize non-bytes object");
- bpy::throw_error_already_set();
- return nullptr;
- }
-
- size_t len = PyBytes_GET_SIZE(skBytes);
- char* sketchImg = PyBytes_AS_STRING(skBytes);
- auto sk = kll_sketch<T>::deserialize(sketchImg, len);
- return sk.release();
+kll_sketch<T> KllSketch_deserialize(py::bytes skBytes) {
+ std::string skStr = skBytes; // implicit cast
+ return kll_sketch<T>::deserialize(skStr.c_str(), skStr.length());
}
template<typename T>
-bpy::object KllSketch_serialize(const kll_sketch<T>& sk) {
+py::object KllSketch_serialize(const kll_sketch<T>& sk) {
auto serResult = sk.serialize();
- PyObject* sketchBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
- return bpy::object{bpy::handle<>(sketchBytes)};
-}
-
-template<typename T>
-double KllSketch_sketchNormalizedRankError(const kll_sketch<T>& sk,
- bool pmf) {
- return sk.get_normalized_rank_error(pmf);
+ return py::bytes((char*)serResult.first.get(), serResult.second);
}
+// maybe possible to disambiguate the static vs method rank error calls, but
+// this is easier for now
template<typename T>
double KllSketch_generalNormalizedRankError(uint16_t k, bool pmf) {
return kll_sketch<T>::get_normalized_rank_error(k, pmf);
}
template<typename T>
-bpy::list KllSketch_getQuantiles(const kll_sketch<T>& sk,
- bpy::list& fractions) {
- size_t nQuantiles = len(fractions);
- double* frac = new double[nQuantiles];
- for (int i = 0; i < nQuantiles; ++i) {
- frac[i] = bpy::extract<double>(fractions[i]);
- }
- std::unique_ptr<T[]> result = sk.get_quantiles(frac, nQuantiles);
+py::list KllSketch_getQuantiles(const kll_sketch<T>& sk,
+ std::vector<double>& fractions) {
+ size_t nQuantiles = fractions.size();
+ std::unique_ptr<T[]> result = sk.get_quantiles(&fractions[0], nQuantiles);
- PyObject* list = PyList_New(nQuantiles);
+ // returning as std::vector<> would copy values to a list anyway
+ py::list list(nQuantiles);
for (int i = 0; i < nQuantiles; ++i) {
- if (std::is_same<T, int>::value)
- PyList_SET_ITEM(list, i, PyLong_FromLong(result[i]));
- else if (std::is_same<T, float>::value)
- PyList_SET_ITEM(list, i, PyFloat_FromDouble(result[i]));
+ list[i] = result[i];
}
- delete [] frac;
- return bpy::list{bpy::handle<>(list)};
+ return list;
}
template<typename T>
-bpy::list KllSketch_getPMF(const kll_sketch<T>& sk,
- bpy::list& split_points) {
- size_t nPoints = len(split_points);
- T* splitPoints = new T[nPoints];
- for (int i = 0; i < nPoints; ++i) {
- splitPoints[i] = bpy::extract<T>(split_points[i]);
- }
- std::unique_ptr<double[]> result = sk.get_PMF(splitPoints, nPoints);
+py::list KllSketch_getPMF(const kll_sketch<T>& sk,
+ std::vector<T>& split_points) {
+ size_t nPoints = split_points.size();
+ std::unique_ptr<double[]> result = sk.get_PMF(&split_points[0], nPoints);
- PyObject* pmf = PyList_New(nPoints);
+ py::list list(nPoints);
for (int i = 0; i < nPoints; ++i) {
- PyList_SET_ITEM(pmf, i, PyFloat_FromDouble(result[i]));
+ list[i] = result[i];
}
- delete [] splitPoints;
- return bpy::list{bpy::handle<>(pmf)};
+ return list;
}
template<typename T>
-bpy::list KllSketch_getCDF(const kll_sketch<T>& sk,
- bpy::list& split_points) {
- size_t nPoints = len(split_points);
- T* splitPoints = new T[nPoints];
- for (int i = 0; i < nPoints; ++i) {
- splitPoints[i] = bpy::extract<T>(split_points[i]);
- }
- std::unique_ptr<double[]> result = sk.get_CDF(splitPoints, nPoints);
+py::list KllSketch_getCDF(const kll_sketch<T>& sk,
+ std::vector<T>& split_points) {
+ size_t nPoints = split_points.size();
+ std::unique_ptr<double[]> result = sk.get_CDF(&split_points[0], nPoints);
- PyObject* cdf = PyList_New(nPoints);
+ py::list list(nPoints);
for (int i = 0; i < nPoints; ++i) {
- PyList_SET_ITEM(cdf, i, PyFloat_FromDouble(result[i]));
+ list[i] = result[i];
}
- delete [] splitPoints;
- return bpy::list{bpy::handle<>(cdf)};
-}
-
-template<typename T>
-uint32_t KllSketch_getSerializedSizeBytes(const kll_sketch<T>& sk) {
- return sk.get_serialized_size_bytes();
+ return list;
}
template<typename T>
+//std::string KllSketch_toString(const kll_sketch<T>& sk, bool print_levels, bool print_items) {
std::string KllSketch_toString(const kll_sketch<T>& sk) {
std::ostringstream ss;
- ss << sk;
+ // kll_sketch::toS_straem class does not currently pay attention to the flags
+ //sk.to_stream(ss, print_levels, print_items);
+ sk.to_stream(ss);
return ss.str();
}
@@ -136,42 +106,38 @@ std::string KllSketch_toString(const kll_sketch<T>& sk) {
namespace dspy = datasketches::python;
template<typename T>
-void bind_kll_sketch(const char* name)
-{
+void bind_kll_sketch(py::module &m, const char* name) {
using namespace datasketches;
- bpy::class_<kll_sketch<T>, boost::noncopyable>(name, bpy::init<uint16_t>())
- .def(bpy::init<const kll_sketch<T>&>())
+ py::class_<kll_sketch<T>>(m, name)
+ .def(py::init<uint16_t>())
+ .def(py::init<const kll_sketch<T>&>())
.def("update", &kll_sketch<T>::update)
.def("merge", &kll_sketch<T>::merge)
.def("__str__", &dspy::KllSketch_toString<T>)
- .def("isEmpty", &kll_sketch<T>::is_empty)
- .def("getN", &kll_sketch<T>::get_n)
- .def("getNumRetained", &kll_sketch<T>::get_num_retained)
- .def("isEstimationMode", &kll_sketch<T>::is_estimation_mode)
- .def("getMinValue", &kll_sketch<T>::get_min_value)
- .def("getMaxValue", &kll_sketch<T>::get_max_value)
- .def("getQuantile", &kll_sketch<T>::get_quantile)
- .def("getQuantiles", &dspy::KllSketch_getQuantiles<T>)
- .def("getRank", &kll_sketch<T>::get_rank)
- .def("getPMF", &dspy::KllSketch_getPMF<T>)
- .def("getCDF", &dspy::KllSketch_getCDF<T>)
- .def("normalizedRankError", &dspy::KllSketch_sketchNormalizedRankError<T>)
- .def("getNormalizedRankError", &dspy::KllSketch_generalNormalizedRankError<T>)
- .staticmethod("getNormalizedRankError")
- .def("getSerializedSizeBytes", &dspy::KllSketch_getSerializedSizeBytes<T>)
- .def("getSizeofItem", &kll_sketch<T>::get_sizeof_item)
- .staticmethod("getSizeofItem")
- .def("getMaxSerializedSizeBytes", &kll_sketch<T>::get_max_serialized_size_bytes)
- .staticmethod("getMaxSerializedSizeBytes")
+ .def("is_empty", &kll_sketch<T>::is_empty)
+ .def("get_n", &kll_sketch<T>::get_n)
+ .def("get_num_retained", &kll_sketch<T>::get_num_retained)
+ .def("is_estimation_mode", &kll_sketch<T>::is_estimation_mode)
+ .def("get_min_value", &kll_sketch<T>::get_min_value)
+ .def("get_max_value", &kll_sketch<T>::get_max_value)
+ .def("get_quantile", &kll_sketch<T>::get_quantile)
+ .def("get_quantiles", &dspy::KllSketch_getQuantiles<T>)
+ .def("get_rank", &kll_sketch<T>::get_rank)
+ .def("get_pmf", &dspy::KllSketch_getPMF<T>)
+ .def("get_cdf", &dspy::KllSketch_getCDF<T>)
+ .def("normalized_rank_error", (double (kll_sketch<T>::*)(bool) const) &kll_sketch<T>::get_normalized_rank_error)
+ .def_static("get_normalized_rank_error", &dspy::KllSketch_generalNormalizedRankError<T>)
+ // can't yet get this one to work
+ //.def("get_serialized_size_bytes", &kll_sketch<T>::get_serialized_size_bytes)
+ // this doesn't seem to be defined in the class
+ //.def_static("get_max_serialized_size_bytes", &kll_sketch<T>::get_max_serialized_size_bytes)
.def("serialize", &dspy::KllSketch_serialize<T>)
- .def("deserialize", &dspy::KllSketch_deserialize<T>, bpy::return_value_policy<bpy::manage_new_object>())
- .staticmethod("deserialize")
+ .def_static("deserialize", &dspy::KllSketch_deserialize<T>)
;
}
-void export_kll()
-{
- bind_kll_sketch<int>("KllIntSketch");
- bind_kll_sketch<float>("KllFloatSketch");
-}
\ No newline at end of file
+void init_kll(py::module &m) {
+ bind_kll_sketch<int>(m, "kll_int_sketch");
+ bind_kll_sketch<float>(m, "kll_float_sketch");
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org