You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2019/06/25 07:26:02 UTC

[incubator-datasketches-cpp] branch pybind11 created (now 984b3eb)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch pybind11
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


      at 984b3eb  port cpc, fi, and hll to pybind11

This branch includes the following new commits:

     new 984b3eb  port cpc, fi, and hll to pybind11

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 01/01: port cpc, fi, and hll to pybind11

Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch pybind11
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit 984b3eb66d6cb4a9fd0665dd29ecdbd75309091c
Author: jmalkin <jm...@users.noreply.github.com>
AuthorDate: Tue Jun 25 00:25:43 2019 -0700

    port cpc, fi, and hll to pybind11
---
 .gitmodules                 |   3 +
 pyproject.toml              |   2 +-
 python/CMakeLists.txt       |  54 ++++++------
 python/pybind11             |   1 +
 python/src/cpc_wrapper.cpp  |  60 ++++++-------
 python/src/datasketches.cpp |  24 ++---
 python/src/fi_wrapper.cpp   |  97 ++++++++------------
 python/src/hll_wrapper.cpp  | 211 ++++++++++++++++----------------------------
 8 files changed, 182 insertions(+), 270 deletions(-)

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..74dfe4a
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "python/pybind11"]
+	path = python/pybind11
+	url = git@github.com:pybind/pybind11.git
diff --git a/pyproject.toml b/pyproject.toml
index 5dbb23e..a5ac9a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,2 @@
 [build-system]
-requires = ["wheel", "setuptools", "cmake >= 3.12"]
+requires = ["wheel", "setuptools", "cmake >= 3.12", "pybind11"]
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 15470a2..51969ce 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,42 +1,40 @@
-find_package(Python3 COMPONENTS Development)
+#find_package(Python3 REQUIRED COMPONENTS Development)
 
-if(NOT DEFINED Python3_VERSION_SUFFIX)
-  set(Python3_VERSION_SUFFIX "${Python3_VERSION_MAJOR}${Python3_VERSION_MINOR}")
-  message("Defaulting to look for libboost_python${Python3_VERSION_SUFFIX}")
+if (MSVC)
+  set(PYBIND11_CPP_STANDARD /std:c++11)
+else()
+  set(PYBIND11_CPP_STANDARD -std=c++11)
 endif()
 
-find_package(Boost 1.53 COMPONENTS python${Python3_VERSION_SUFFIX} REQUIRED)
+add_subdirectory(pybind11)
+message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIRS}")
 
-if(NOT(${Boost_FOUND} AND ${Python3_Development_FOUND}))
-  message("If Python3 found but not Boost, check the Python3 version and set -DPython3_VERSION_SUFFIX to match the suffix on libboost_python.")
-  return()
-endif()
-
-add_library(python SHARED EXCLUDE_FROM_ALL)
+pybind11_add_module(python MODULE EXCLUDE_FROM_ALL)
+#add_library(python SHARED EXCLUDE_FROM_ALL)
 
 target_link_libraries(python
-  common
-  hll
-  kll
-  cpc
-  fi
-  ${Python3_LIBRARIES}
-  Boost::python${Python3_VERSION_SUFFIX}
-)
-
-target_include_directories(python
   PRIVATE
-    ${Python3_INCLUDE_DIRS}
-    ${Boost_INCLUDE_DIRS}
+    common
+    hll
+    #kll
+    cpc
+    fi
+    #${Python3_LIBRARIES}
+    pybind11::module
 )
 
+#target_include_directories(python
+#  PRIVATE
+#    ${Python3_INCLUDE_DIRS}
+#)
+
 set_target_properties(python PROPERTIES
   PREFIX ""
   OUTPUT_NAME datasketches
-  POSITION_INDEPENDENT_CODE ON
-  LINKER_LANGUAGE CXX
-  CXX_STANDARD 11
-  CXX_STANDARD_REQUIRED YES
+#  POSITION_INDEPENDENT_CODE ON
+#  LINKER_LANGUAGE CXX
+#  CXX_STANDARD 11
+#  CXX_STANDARD_REQUIRED YES
 )
 
 # ensure we make a .so on Mac rather than .dylib
@@ -48,7 +46,7 @@ target_sources(python
   PRIVATE
     src/datasketches.cpp
     src/hll_wrapper.cpp
-    src/kll_wrapper.cpp
+    #src/kll_wrapper.cpp
     src/cpc_wrapper.cpp
     src/fi_wrapper.cpp
 )
diff --git a/python/pybind11 b/python/pybind11
new file mode 160000
index 0000000..a1b71df
--- /dev/null
+++ b/python/pybind11
@@ -0,0 +1 @@
+Subproject commit a1b71df137e015d44f7e31f7b6d4807253fb7871
diff --git a/python/src/cpc_wrapper.cpp b/python/src/cpc_wrapper.cpp
index b317ff7..cbeba64 100644
--- a/python/src/cpc_wrapper.cpp
+++ b/python/src/cpc_wrapper.cpp
@@ -17,34 +17,28 @@
  * under the License.
  */
 
+
 #include "cpc_sketch.hpp"
 #include "cpc_union.hpp"
 #include "cpc_common.hpp"
-#include <boost/python.hpp>
 
-namespace bpy = boost::python;
+#include <pybind11/pybind11.h>
+#include <sstream>
+
+namespace py = pybind11;
 
 namespace datasketches {
 namespace python {
 
-cpc_sketch* CpcSketch_deserialize(bpy::object obj) {
-  PyObject* skBytes = obj.ptr();
-  if (!PyBytes_Check(skBytes)) {
-    PyErr_SetString(PyExc_TypeError, "Attmpted to deserialize non-bytes object");
-    bpy::throw_error_already_set();
-    return nullptr;
-  }
-  
-  size_t len = PyBytes_GET_SIZE(skBytes);
-  char* sketchImg = PyBytes_AS_STRING(skBytes);
-  cpc_sketch_unique_ptr sk = cpc_sketch::deserialize(sketchImg, len);
+cpc_sketch* CpcSketch_deserialize(py::bytes skBytes) {
+  std::string skStr = skBytes; // implicit cast
+  cpc_sketch_unique_ptr sk = cpc_sketch::deserialize(skStr.c_str(), skStr.length());
   return sk.release();
 }
 
-bpy::object CpcSketch_serialize(const cpc_sketch& sk) {
+py::object CpcSketch_serialize(const cpc_sketch& sk) {
   auto serResult = sk.serialize();
-  PyObject* sketchBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(sketchBytes)};
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
 std::string CpcSketch_toString(const cpc_sketch& sk) {
@@ -63,32 +57,32 @@ cpc_sketch* CpcUnion_getResult(const cpc_union& u) {
 
 namespace dspy = datasketches::python;
 
-void export_cpc()
-{
+void init_cpc(py::module &m) {
   using namespace datasketches;
 
-  bpy::class_<cpc_sketch, boost::noncopyable>("CpcSketch", bpy::init<const cpc_sketch&>())
-    .def(bpy::init<uint8_t>())
-    .def(bpy::init<uint8_t, uint64_t>())
+  py::class_<cpc_sketch>(m, "cpc_sketch")
+    .def(py::init<const cpc_sketch&>())
+    .def(py::init<uint8_t>())
+    .def(py::init<uint8_t, uint64_t>())
     .def("__str__", &dspy::CpcSketch_toString)
     .def("serialize", &dspy::CpcSketch_serialize)
-    .def("deserialize", &dspy::CpcSketch_deserialize, bpy::return_value_policy<bpy::manage_new_object>())
-    .staticmethod("deserialize")
+    .def_static("deserialize", &dspy::CpcSketch_deserialize, py::return_value_policy::take_ownership)
     .def<void (cpc_sketch::*)(uint64_t)>("update", &cpc_sketch::update)
     .def<void (cpc_sketch::*)(int64_t)>("update", &cpc_sketch::update)
     .def<void (cpc_sketch::*)(double)>("update", &cpc_sketch::update)
     .def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update)
-    .def("isEmpty", &cpc_sketch::is_empty)
-    .def("getEstimate", &cpc_sketch::get_estimate)
-    .def("getLowerBound", &cpc_sketch::get_lower_bound)
-    .def("getUpperBound", &cpc_sketch::get_upper_bound)
-    .def("getEstimate", &cpc_sketch::get_estimate)
+    .def("is_empty", &cpc_sketch::is_empty)
+    .def("get_estimate", &cpc_sketch::get_estimate)
+    .def("get_lower_bound", &cpc_sketch::get_lower_bound)
+    .def("get_upper_bound", &cpc_sketch::get_upper_bound)
+    .def("get_estimate", &cpc_sketch::get_estimate)
     ;
 
-  bpy::class_<cpc_union, boost::noncopyable>("CpcUnion", bpy::init<const cpc_union&>())
-    .def(bpy::init<uint8_t>())
-    .def(bpy::init<uint8_t, uint64_t>())
+  py::class_<cpc_union>(m, "cpc_union")
+    .def(py::init<const cpc_union&>())
+    .def(py::init<uint8_t>())
+    .def(py::init<uint8_t, uint64_t>())
     .def("update", &cpc_union::update)
-    .def("getResult", &dspy::CpcUnion_getResult, bpy::return_value_policy<bpy::manage_new_object>())
+    .def("get_result", &dspy::CpcUnion_getResult, py::return_value_policy::take_ownership)
     ;
-}
\ No newline at end of file
+}
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index 96f7c34..1e46402 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -17,16 +17,20 @@
  * under the License.
  */
 
-#include <boost/python.hpp>
+#include <pybind11/pybind11.h>
 
-void export_hll();
-void export_kll();
-void export_cpc();
-void export_fi();
+namespace py = pybind11;
 
-BOOST_PYTHON_MODULE(datasketches) {
-  export_hll();
-  export_kll();
-  export_cpc();
-  export_fi();
+void init_hll(py::module& m);
+//void init_kll(py::module& m);
+void init_fi(py::module& m);
+void init_cpc(py::module& m);
+//void init_theta(py::module& m);
+
+PYBIND11_MODULE(datasketches, m) {
+  init_hll(m);
+  //init_kll(m);
+  init_fi(m);
+  init_cpc(m);
+  //init_theta(m);
 }
diff --git a/python/src/fi_wrapper.cpp b/python/src/fi_wrapper.cpp
index 2b003d6..5da28ef 100644
--- a/python/src/fi_wrapper.cpp
+++ b/python/src/fi_wrapper.cpp
@@ -18,65 +18,47 @@
  */
 
 #include "frequent_items_sketch.hpp"
-#include <boost/python.hpp>
 
-namespace bpy = boost::python;
+#include <pybind11/pybind11.h>
+#include <sstream>
+
+namespace py = pybind11;
 
 namespace datasketches {
 namespace python {
 
 template<typename T>
-frequent_items_sketch<T>* FISketch_deserialize(bpy::object obj) {
-  PyObject* skBytes = obj.ptr();
-  if (!PyBytes_Check(skBytes)) {
-    PyErr_SetString(PyExc_TypeError, "Attmpted to deserialize non-bytes object");
-    bpy::throw_error_already_set();
-    return nullptr;
-  }
-  
-  size_t len = PyBytes_GET_SIZE(skBytes);
-  char* sketchImg = PyBytes_AS_STRING(skBytes);
-  auto sk = frequent_items_sketch<T>::deserialize(sketchImg, len);
-  return std::move(&sk);
+frequent_items_sketch<T> FISketch_deserialize(py::bytes skBytes) {
+  std::string skStr = skBytes; // implicit cast  
+  return frequent_items_sketch<T>::deserialize(skStr.c_str(), skStr.length());
 }
 
 template<typename T>
-bpy::object FISketch_serialize(const frequent_items_sketch<T>& sk) {
+py::object FISketch_serialize(const frequent_items_sketch<T>& sk) {
   auto serResult = sk.serialize();
-  PyObject* sketchBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(sketchBytes)};
-}
-
-template<typename T>
-double FISketch_getSketchEpsilon(const frequent_items_sketch<T>& sk) {
-  return sk.get_epsilon();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
+// maybe possible to disambiguate the static vs method get_epsilon calls, but
+// this is easier for now
 template<typename T>
 double FISketch_getGenericEpsilon(uint8_t lg_max_map_size) {
   return frequent_items_sketch<T>::get_epsilon(lg_max_map_size);
 }
 
 template<typename T>
-void FISketch_update(frequent_items_sketch<T>& sk,
-                     const T& item,
-                     uint64_t weight = 1) {
-  sk.update(item, weight);
-}
-
-template<typename T>
-bpy::list FISketch_getFrequentItems(const frequent_items_sketch<T>& sk,
-                                    frequent_items_error_type err_type,
-                                    uint64_t threshold = 0) {
+py::list FISketch_getFrequentItems(const frequent_items_sketch<T>& sk,
+                                   frequent_items_error_type err_type,
+                                   uint64_t threshold = 0) {
   if (threshold == 0) { threshold = sk.get_maximum_error(); }
 
-  bpy::list list;
+  py::list list;
   auto items = sk.get_frequent_items(err_type, threshold);
   for (auto iter = items.begin(); iter != items.end(); ++iter) {
-    bpy::tuple t = bpy::make_tuple(iter->get_item(),
-                                   iter->get_estimate(),
-                                   iter->get_lower_bound(),
-                                   iter->get_upper_bound());
+    py::tuple t = py::make_tuple(iter->get_item(),
+                                 iter->get_estimate(),
+                                 iter->get_lower_bound(),
+                                 iter->get_upper_bound());
     list.append(t);
   }
   return list;
@@ -91,25 +73,20 @@ std::string FISketch_toString(const frequent_items_sketch<T>& sk,
 }
 
 }
-
 }
 
 namespace dspy = datasketches::python;
 
-BOOST_PYTHON_FUNCTION_OVERLOADS(FISketchUpdateOverloads, dspy::FISketch_update, 2, 3)
-BOOST_PYTHON_FUNCTION_OVERLOADS(FISketchGetFrequentItemsOverloads, dspy::FISketch_getFrequentItems, 2, 3)
-BOOST_PYTHON_FUNCTION_OVERLOADS(FISketchToStringOverloads, dspy::FISketch_toString, 1, 2)
-
 template<typename T>
-void bind_fi_sketch(const char* name)
-{
+void bind_fi_sketch(py::module &m, const char* name) {
   using namespace datasketches;
 
-  bpy::class_<frequent_items_sketch<T>, boost::noncopyable>(name, bpy::init<uint8_t>())
-    .def("__str__", &dspy::FISketch_toString<T>, FISketchToStringOverloads())
-    .def("to_string", &dspy::FISketch_toString<T>, FISketchToStringOverloads())
-    .def("update", &dspy::FISketch_update<T>, FISketchUpdateOverloads())
-    .def("get_frequent_items", &dspy::FISketch_getFrequentItems<T>, FISketchGetFrequentItemsOverloads())
+  py::class_<frequent_items_sketch<T>>(m, name)
+    .def(py::init<uint8_t>())
+    .def("__str__", &dspy::FISketch_toString<T>, py::arg("print_items")=false)
+    .def("to_string", &dspy::FISketch_toString<T>, py::arg("print_items")=false)
+    .def("update", (void (frequent_items_sketch<T>::*)(const T&, uint64_t)) &frequent_items_sketch<T>::update, py::arg("item"), py::arg("weight")=1)
+    .def("get_frequent_items", &dspy::FISketch_getFrequentItems<T>, py::arg("err_type"), py::arg("threshold")=0)
     .def("merge", &frequent_items_sketch<T>::merge)
     .def("is_empty", &frequent_items_sketch<T>::is_empty)
     .def("get_num_active_items", &frequent_items_sketch<T>::get_num_active_items)
@@ -117,26 +94,22 @@ void bind_fi_sketch(const char* name)
     .def("get_estimate", &frequent_items_sketch<T>::get_estimate)
     .def("get_lower_bound", &frequent_items_sketch<T>::get_lower_bound)
     .def("get_upper_bound", &frequent_items_sketch<T>::get_upper_bound)
-    .def("get_sketch_epsilon", &dspy::FISketch_getSketchEpsilon<T>)
-    .def("get_epsilon_for_lg_size", &dspy::FISketch_getGenericEpsilon<T>)
-    .staticmethod("get_epsilon_for_lg_size")
-    .def("get_apriori_error", &frequent_items_sketch<T>::get_apriori_error)
-    .staticmethod("get_apriori_error")
+    .def("get_sketch_epsilon", (double (frequent_items_sketch<T>::*)(void) const) &frequent_items_sketch<T>::get_epsilon)
+    .def_static("get_epsilon_for_lg_size", &dspy::FISketch_getGenericEpsilon<T>)
+    .def_static("get_apriori_error", &frequent_items_sketch<T>::get_apriori_error)
     .def("get_serialized_size_bytes", &frequent_items_sketch<T>::get_serialized_size_bytes)
     .def("serialize", &dspy::FISketch_serialize<T>)
-    .def("deserialize", &dspy::FISketch_deserialize<T>, bpy::return_value_policy<bpy::manage_new_object>())
-    .staticmethod("deserialize")
+    .def_static("deserialize", &dspy::FISketch_deserialize<T>)
     ;
 }
 
-void export_fi()
-{
+void init_fi(py::module &m) {
   using namespace datasketches;
 
-  bpy::enum_<frequent_items_error_type>("frequent_items_error_type")
+  py::enum_<frequent_items_error_type>(m, "frequent_items_error_type")
     .value("NO_FALSE_POSITIVES", NO_FALSE_POSITIVES)
     .value("NO_FALSE_NEGATIVES", NO_FALSE_NEGATIVES)
-    ;
+    .export_values();
 
-  bind_fi_sketch<std::string>("FrequentStringsSketch");
-}
\ No newline at end of file
+  bind_fi_sketch<std::string>(m, "frequent_strings_sketch");
+}
diff --git a/python/src/hll_wrapper.cpp b/python/src/hll_wrapper.cpp
index 0a7f40e..aaf9257 100644
--- a/python/src/hll_wrapper.cpp
+++ b/python/src/hll_wrapper.cpp
@@ -18,93 +18,42 @@
  */
 
 #include "hll.hpp"
-#include <boost/python.hpp>
-#include <memory>
 
-namespace bpy = boost::python;
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
 
 namespace datasketches {
 namespace python {
 
-HllSketch<> HllSketch_deserialize(bpy::object obj) {
-  PyObject* skBytes = obj.ptr();
-  if (!PyBytes_Check(skBytes)) {
-    PyErr_SetString(PyExc_TypeError, "Attmpted to deserialize non-bytes object");
-    bpy::throw_error_already_set();
-  }
-  
-  size_t len = PyBytes_GET_SIZE(skBytes);
-  char* sketchImg = PyBytes_AS_STRING(skBytes);
-  HllSketch<> sk = HllSketch<>::deserialize(sketchImg, len);
-  return sk;
+HllSketch<> HllSketch_deserialize(py::bytes skBytes) {
+  std::string skStr = skBytes; // implicit cast  
+  return HllSketch<>::deserialize(skStr.c_str(), skStr.length());
 }
 
-bpy::object HllSketch_serializeCompact(const HllSketch<>& sk) {
-  std::pair<byte_ptr_with_deleter, const size_t> serResult = sk.serializeCompact();
-  PyObject* sketchBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(sketchBytes)};
+py::object HllSketch_serializeCompact(const HllSketch<>& sk) {
+  auto serResult = sk.serializeCompact();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
-
-bpy::object HllSketch_serializeUpdatable(const HllSketch<>& sk) {
-  // TODO: can we just releast the smart pointer?
-  std::pair<byte_ptr_with_deleter, const size_t> serResult = sk.serializeUpdatable();
-  PyObject* sketchBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(sketchBytes)};
+py::object HllSketch_serializeUpdatable(const HllSketch<>& sk) {
+  auto serResult = sk.serializeUpdatable();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
-std::string HllSketch_toString(const HllSketch<>& sk,
-                               bool summary = true,
-                               bool detail = false,
-                               bool auxDetail = false,
-                               bool all = false) {
-  return sk.to_string(summary, detail, auxDetail, all);
+HllUnion<> HllUnion_deserialize(py::bytes uBytes) {
+  std::string uStr = uBytes; // implicit cast
+  return HllUnion<>::deserialize(uStr.c_str(), uStr.length());
 }
 
-std::string HllSketch_toStringDefault(const HllSketch<>& sk) {
-  return HllSketch_toString(sk);
+py::object HllUnion_serializeCompact(const HllUnion<>& u) {
+  auto serResult = u.serializeCompact();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
-HllUnion<> HllUnion_deserialize(bpy::object obj) {
-  PyObject* skBytes = obj.ptr();
-  if (!PyBytes_Check(skBytes)) {
-    PyErr_SetString(PyExc_TypeError, "Attmpted to deserialize non-bytes object");
-    bpy::throw_error_already_set();
-  }
-  
-  size_t len = PyBytes_GET_SIZE(skBytes);
-  char* sketchImg = PyBytes_AS_STRING(skBytes);
-  HllUnion<> u = HllUnion<>::deserialize(sketchImg, len);
-  return u;
-}
-
-bpy::object HllUnion_serializeCompact(const HllUnion<>& u) {
-  std::pair<byte_ptr_with_deleter, const size_t> serResult = u.serializeCompact();
-  PyObject* unionBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(unionBytes)};
-}
-
-bpy::object HllUnion_serializeUpdatable(const HllUnion<>& u) {
-  std::pair<byte_ptr_with_deleter, const size_t> serResult = u.serializeUpdatable();
-  PyObject* unionBytes = PyBytes_FromStringAndSize((char*)serResult.first.get(), serResult.second);
-  return bpy::object{bpy::handle<>(unionBytes)};
-}
-
-std::string HllUnion_toString(const HllUnion<>& u,
-                              bool summary = true,
-                              bool detail = false,
-                              bool auxDetail = false,
-                              bool all = false) {
-  return u.to_string(summary, detail, auxDetail, all);
-}
-
-std::string HllUnion_toStringDefault(const HllUnion<>& u) {
-  return HllUnion_toString(u);
-}
-
-HllSketch<> HllUnion_getResult(const HllUnion<>& u,
-                                TgtHllType tgtHllType = HLL_4) {
-  return std::move(u.getResult(tgtHllType));
+py::object HllUnion_serializeUpdatable(const HllUnion<>& u) {
+  auto serResult = u.serializeUpdatable();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
 }
 
 }
@@ -112,82 +61,72 @@ HllSketch<> HllUnion_getResult(const HllUnion<>& u,
 
 namespace dspy = datasketches::python;
 
-BOOST_PYTHON_FUNCTION_OVERLOADS(HllSketchToStringOverloads, dspy::HllSketch_toString, 1, 5);
-
-BOOST_PYTHON_FUNCTION_OVERLOADS(HllUnionToStringOverloads, dspy::HllUnion_toString, 1, 5);
-BOOST_PYTHON_FUNCTION_OVERLOADS(HllUnionGetResultOverloads, dspy::HllUnion_getResult, 1, 2);
-
-void export_hll()
-{
+void init_hll(py::module &m) {
   using namespace datasketches;
 
-  bpy::enum_<TgtHllType>("TgtHllType")
+  py::enum_<TgtHllType>(m, "tgt_hll_type", "Target HLL flavor")
     .value("HLL_4", HLL_4)
     .value("HLL_6", HLL_6)
     .value("HLL_8", HLL_8)
-    ;
-
-  bpy::class_<HllSketch<>, boost::noncopyable>("HllSketch", bpy::init<int>())
-    .def(bpy::init<int, TgtHllType>())
-    .def(bpy::init<int, TgtHllType, bool>())
-    //.def("deserialize", &dspy::HllSketch_deserialize, bpy::return_value_policy<bpy::manage_new_object>())
-    .def("deserialize", &dspy::HllSketch_deserialize)
-    .staticmethod("deserialize")
-    .def("serializeCompact", &dspy::HllSketch_serializeCompact)
-    .def("serializeUpdatable", &dspy::HllSketch_serializeUpdatable)
-    .def("__str__", &dspy::HllSketch_toStringDefault)
-    .add_property("lgConfigK", &HllSketch<>::getLgConfigK)
-    .add_property("tgtHllType", &HllSketch<>::getTgtHllType)
-    .def("toString", &dspy::HllSketch_toString, HllSketchToStringOverloads())
-    .def("getEstimate", &HllSketch<>::getEstimate)
-    .def("getCompositeEstimate", &HllSketch<>::getCompositeEstimate)
-    .def("getLowerBound", &HllSketch<>::getLowerBound)
-    .def("getUpperBound", &HllSketch<>::getUpperBound)
-    .def("isCompact", &HllSketch<>::isCompact)
-    .def("isEmpty", &HllSketch<>::isEmpty)
-    .def("getUpdatableSerializationBytes", &HllSketch<>::getUpdatableSerializationBytes)
-    .def("getCompactSerializationBytes", &HllSketch<>::getCompactSerializationBytes)
+    .export_values();
+
+  py::class_<HllSketch<>>(m, "hll_sketch")
+    .def(py::init<int>())
+    .def(py::init<int, TgtHllType>())
+    .def(py::init<int, TgtHllType, bool>())
+    .def_static("deserialize", &dspy::HllSketch_deserialize)
+    .def("serialize_compact", &dspy::HllSketch_serializeCompact)
+    .def("serialize_updatable", &dspy::HllSketch_serializeUpdatable)
+    .def("to_string", (std::string (HllSketch<>::*)(bool,bool,bool,bool) const) &HllSketch<>::to_string,
+         py::arg("summary")=true, py::arg("detail")=false, py::arg("aux_detail")=false, py::arg("all")=false)
+    .def("__str__", (std::string (HllSketch<>::*)(bool,bool,bool,bool) const) &HllSketch<>::to_string,
+         py::arg("summary")=true, py::arg("detail")=false, py::arg("aux_detail")=false, py::arg("all")=false)
+    .def_property_readonly("lg_config_k", &HllSketch<>::getLgConfigK)
+    .def_property_readonly("tgt_hll_type", &HllSketch<>::getTgtHllType)
+    .def("get_estimate", &HllSketch<>::getEstimate)
+    .def("get_composite_estimate", &HllSketch<>::getCompositeEstimate)
+    .def("get_lower_bound", &HllSketch<>::getLowerBound)
+    .def("get_upper_bound", &HllSketch<>::getUpperBound)
+    .def("is_compact", &HllSketch<>::isCompact)
+    .def("is_empty", &HllSketch<>::isEmpty)
+    .def("get_updatable_serialization_bytes", &HllSketch<>::getUpdatableSerializationBytes)
+    .def("get_compact_serialization_bytes", &HllSketch<>::getCompactSerializationBytes)
     .def("reset", &HllSketch<>::reset)
-    .def<void (HllSketch<>::*)(uint64_t)>("update", &HllSketch<>::update)
-    .def<void (HllSketch<>::*)(int64_t)>("update", &HllSketch<>::update)
-    .def<void (HllSketch<>::*)(double)>("update", &HllSketch<>::update)
-    .def<void (HllSketch<>::*)(const std::string&)>("update", &HllSketch<>::update)
-    .def("getMaxUpdatableSerializationBytes", &HllSketch<>::getMaxUpdatableSerializationBytes)
-    .staticmethod("getMaxUpdatableSerializationBytes")
-    .def("getRelErr", &HllSketch<>::getRelErr)
-    .staticmethod("getRelErr")
+    .def("update", (void (HllSketch<>::*)(uint64_t)) &HllSketch<>::update)
+    .def("update", (void (HllSketch<>::*)(int64_t)) &HllSketch<>::update)
+    .def("update", (void (HllSketch<>::*)(double)) &HllSketch<>::update)
+    .def("update", (void (HllSketch<>::*)(const std::string&)) &HllSketch<>::update)
+    .def_static("get_max_updatable_serialization_bytes", &HllSketch<>::getMaxUpdatableSerializationBytes)
+    .def_static("get_rel_err", &HllSketch<>::getRelErr)
     ;
 
-  bpy::class_<HllUnion<>, boost::noncopyable>("HllUnion", bpy::init<int>())
-    //.def("deserialize", &dspy::HllUnion_deserialize, bpy::return_value_policy<bpy::manage_new_object>())
-    .def("deserialize", &dspy::HllUnion_deserialize)
-    .staticmethod("deserialize")
-    .def("serializeCompact", &dspy::HllUnion_serializeCompact)
-    .def("serializeUpdatable", &dspy::HllUnion_serializeUpdatable)
-    .def("__str__", &dspy::HllUnion_toStringDefault)
-    .add_property("lgConfigK", &HllUnion<>::getLgConfigK)
-    .add_property("tgtHllType", &HllUnion<>::getTgtHllType)
-    .def("toString", &dspy::HllUnion_toString, HllUnionToStringOverloads())
-    .def("getEstimate", &HllUnion<>::getEstimate)
-    .def("getCompositeEstimate", &HllUnion<>::getCompositeEstimate)
-    .def("getLowerBound", &HllUnion<>::getLowerBound)
-    .def("getUpperBound", &HllUnion<>::getUpperBound)
-    .def("isCompact", &HllUnion<>::isCompact)
-    .def("isEmpty", &HllUnion<>::isEmpty)
-    .def("getUpdatableSerializationBytes", &HllUnion<>::getUpdatableSerializationBytes)
-    .def("getCompactSerializationBytes", &HllUnion<>::getCompactSerializationBytes)
+  py::class_<HllUnion<>>(m, "hll_union")
+    .def(py::init<int>())
+    .def_static("deserialize", &dspy::HllUnion_deserialize)
+    .def("serialize_compact", &dspy::HllUnion_serializeCompact)
+    .def("serialize_updatable", &dspy::HllUnion_serializeUpdatable)
+    .def("to_string", (std::string (HllUnion<>::*)(bool,bool,bool,bool) const) &HllUnion<>::to_string,
+         py::arg("summary")=true, py::arg("detail")=false, py::arg("aux_detail")=false, py::arg("all")=false)
+    .def("__str__", (std::string (HllUnion<>::*)(bool,bool,bool,bool) const) &HllUnion<>::to_string,
+         py::arg("summary")=true, py::arg("detail")=false, py::arg("aux_detail")=false, py::arg("all")=false)
+    .def_property_readonly("lg_config_k", &HllUnion<>::getLgConfigK)
+    .def_property_readonly("tgt_hll_type", &HllUnion<>::getTgtHllType)
+    .def("get_estimate", &HllUnion<>::getEstimate)
+    .def("get_composite_estimate", &HllUnion<>::getCompositeEstimate)
+    .def("get_lower_bound", &HllUnion<>::getLowerBound)
+    .def("get_upper_bound", &HllUnion<>::getUpperBound)
+    .def("is_Compact", &HllUnion<>::isCompact)
+    .def("is_empty", &HllUnion<>::isEmpty)
+    .def("get_updatable_serialization_bytes", &HllUnion<>::getUpdatableSerializationBytes)
+    .def("get_compact_serialization_bytes", &HllUnion<>::getCompactSerializationBytes)
     .def("reset", &HllUnion<>::reset)
-    //.def("getResult", &dspy::HllUnion_getResult, HllUnionGetResultOverloads()[bpy::return_value_policy<bpy::manage_new_object>()])
-    .def("getResult", &dspy::HllUnion_getResult, HllUnionGetResultOverloads())
+    .def("get_result", &HllUnion<>::getResult, py::arg("tgt_hll_type")=HLL_4)
     .def<void (HllUnion<>::*)(const HllSketch<>&)>("update", &HllUnion<>::update)
     .def<void (HllUnion<>::*)(uint64_t)>("update", &HllUnion<>::update)
     .def<void (HllUnion<>::*)(int64_t)>("update", &HllUnion<>::update)
     .def<void (HllUnion<>::*)(double)>("update", &HllUnion<>::update)
     .def<void (HllUnion<>::*)(const std::string&)>("update", &HllUnion<>::update)
-    //.def<void (HllUnion::*)(const void*, size_t)>("update", &HllUnion::update)
-    .def("getMaxSerializationBytes", &HllUnion<>::getMaxSerializationBytes)
-    .staticmethod("getMaxSerializationBytes")
-    .def("getRelErr", &HllUnion<>::getRelErr)
-    .staticmethod("getRelErr")
+    .def_static("get_max_serialization_bytes", &HllUnion<>::getMaxSerializationBytes)
+    .def_static("get_rel_err", &HllUnion<>::getRelErr)
     ;
-}
\ No newline at end of file
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org