You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2019/07/03 03:04:31 UTC

[incubator-datasketches-cpp] branch python_theta created (now edb3dc6)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch python_theta
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git.


      at edb3dc6  finish adding theta wrapper, add names to arguments on other sketches

This branch includes the following new commits:

     new edb3dc6  finish adding theta wrapper, add names to arguments on other sketches

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[incubator-datasketches-cpp] 01/01: finish adding theta wrapper, add names to arguments on other sketches

Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch python_theta
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit edb3dc6924c9b8386c22445d8b003acd2fcec6c2
Author: jmalkin <jm...@users.noreply.github.com>
AuthorDate: Tue Jul 2 20:04:15 2019 -0700

    finish adding theta wrapper, add names to arguments on other sketches
---
 python/CMakeLists.txt               |   7 +-
 python/src/cpc_wrapper.cpp          |  24 +++---
 python/src/datasketches.cpp         |   4 +-
 python/src/fi_wrapper.cpp           |  12 +--
 python/src/hll_wrapper.cpp          |  45 ++++++------
 python/src/kll_wrapper.cpp          |  22 +++---
 python/src/theta_wrapper.cpp        | 141 ++++++++++++++++++++++++++++++++++++
 theta/include/theta_sketch_impl.hpp |   2 +
 8 files changed, 199 insertions(+), 58 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index b37171c..7e826cc 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -8,8 +8,7 @@ endif()
 add_subdirectory(pybind11)
 message(STATUS "Found pybind11 v${pybind11_VERSION}: ${pybind11_INCLUDE_DIRS}")
 
-pybind11_add_module(python MODULE EXCLUDE_FROM_ALL)
-#add_library(python SHARED EXCLUDE_FROM_ALL)
+pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
 
 target_link_libraries(python
   PRIVATE
@@ -18,7 +17,7 @@ target_link_libraries(python
     kll
     cpc
     fi
-    #theta
+    theta
     pybind11::module
 )
 
@@ -39,5 +38,5 @@ target_sources(python
     src/kll_wrapper.cpp
     src/cpc_wrapper.cpp
     src/fi_wrapper.cpp
-    #stc/theta_wrapper.cpp
+    src/theta_wrapper.cpp
 )
diff --git a/python/src/cpc_wrapper.cpp b/python/src/cpc_wrapper.cpp
index cbeba64..8c6e9b5 100644
--- a/python/src/cpc_wrapper.cpp
+++ b/python/src/cpc_wrapper.cpp
@@ -61,28 +61,24 @@ void init_cpc(py::module &m) {
   using namespace datasketches;
 
   py::class_<cpc_sketch>(m, "cpc_sketch")
+    .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k"), py::arg("seed")=DEFAULT_SEED)
     .def(py::init<const cpc_sketch&>())
-    .def(py::init<uint8_t>())
-    .def(py::init<uint8_t, uint64_t>())
     .def("__str__", &dspy::CpcSketch_toString)
     .def("serialize", &dspy::CpcSketch_serialize)
-    .def_static("deserialize", &dspy::CpcSketch_deserialize, py::return_value_policy::take_ownership)
-    .def<void (cpc_sketch::*)(uint64_t)>("update", &cpc_sketch::update)
-    .def<void (cpc_sketch::*)(int64_t)>("update", &cpc_sketch::update)
-    .def<void (cpc_sketch::*)(double)>("update", &cpc_sketch::update)
-    .def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update)
+    .def_static("deserialize", &dspy::CpcSketch_deserialize)
+    .def<void (cpc_sketch::*)(uint64_t)>("update", &cpc_sketch::update, py::arg("datum"))
+    .def<void (cpc_sketch::*)(double)>("update", &cpc_sketch::update, py::arg("datum"))
+    .def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update, py::arg("datum"))
     .def("is_empty", &cpc_sketch::is_empty)
     .def("get_estimate", &cpc_sketch::get_estimate)
-    .def("get_lower_bound", &cpc_sketch::get_lower_bound)
-    .def("get_upper_bound", &cpc_sketch::get_upper_bound)
-    .def("get_estimate", &cpc_sketch::get_estimate)
+    .def("get_lower_bound", &cpc_sketch::get_lower_bound, py::arg("kappa"))
+    .def("get_upper_bound", &cpc_sketch::get_upper_bound, py::arg("kappa"))
     ;
 
   py::class_<cpc_union>(m, "cpc_union")
+    .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k"), py::arg("seed")=DEFAULT_SEED)
     .def(py::init<const cpc_union&>())
-    .def(py::init<uint8_t>())
-    .def(py::init<uint8_t, uint64_t>())
-    .def("update", &cpc_union::update)
-    .def("get_result", &dspy::CpcUnion_getResult, py::return_value_policy::take_ownership)
+    .def("update", &cpc_union::update, py::arg("sketch"))
+    .def("get_result", &dspy::CpcUnion_getResult)
     ;
 }
diff --git a/python/src/datasketches.cpp b/python/src/datasketches.cpp
index 5e97f57..158f97c 100644
--- a/python/src/datasketches.cpp
+++ b/python/src/datasketches.cpp
@@ -25,12 +25,12 @@ void init_hll(py::module& m);
 void init_kll(py::module& m);
 void init_fi(py::module& m);
 void init_cpc(py::module& m);
-//void init_theta(py::module& m);
+void init_theta(py::module& m);
 
 PYBIND11_MODULE(datasketches, m) {
   init_hll(m);
   init_kll(m);
   init_fi(m);
   init_cpc(m);
-  //init_theta(m);
+  init_theta(m);
 }
diff --git a/python/src/fi_wrapper.cpp b/python/src/fi_wrapper.cpp
index 5da28ef..14471fb 100644
--- a/python/src/fi_wrapper.cpp
+++ b/python/src/fi_wrapper.cpp
@@ -82,7 +82,7 @@ void bind_fi_sketch(py::module &m, const char* name) {
   using namespace datasketches;
 
   py::class_<frequent_items_sketch<T>>(m, name)
-    .def(py::init<uint8_t>())
+    .def(py::init<uint8_t>(), py::arg("lg_max_k"))
     .def("__str__", &dspy::FISketch_toString<T>, py::arg("print_items")=false)
     .def("to_string", &dspy::FISketch_toString<T>, py::arg("print_items")=false)
     .def("update", (void (frequent_items_sketch<T>::*)(const T&, uint64_t)) &frequent_items_sketch<T>::update, py::arg("item"), py::arg("weight")=1)
@@ -91,12 +91,12 @@ void bind_fi_sketch(py::module &m, const char* name) {
     .def("is_empty", &frequent_items_sketch<T>::is_empty)
     .def("get_num_active_items", &frequent_items_sketch<T>::get_num_active_items)
     .def("get_total_weight", &frequent_items_sketch<T>::get_total_weight)
-    .def("get_estimate", &frequent_items_sketch<T>::get_estimate)
-    .def("get_lower_bound", &frequent_items_sketch<T>::get_lower_bound)
-    .def("get_upper_bound", &frequent_items_sketch<T>::get_upper_bound)
+    .def("get_estimate", &frequent_items_sketch<T>::get_estimate, py::arg("item"))
+    .def("get_lower_bound", &frequent_items_sketch<T>::get_lower_bound, py::arg("item"))
+    .def("get_upper_bound", &frequent_items_sketch<T>::get_upper_bound, py::arg("item"))
     .def("get_sketch_epsilon", (double (frequent_items_sketch<T>::*)(void) const) &frequent_items_sketch<T>::get_epsilon)
-    .def_static("get_epsilon_for_lg_size", &dspy::FISketch_getGenericEpsilon<T>)
-    .def_static("get_apriori_error", &frequent_items_sketch<T>::get_apriori_error)
+    .def_static("get_epsilon_for_lg_size", &dspy::FISketch_getGenericEpsilon<T>, py::arg("lg_max_map_size"))
+    .def_static("get_apriori_error", &frequent_items_sketch<T>::get_apriori_error, py::arg("lg_max_map_size"), py::arg("estimated_total_weight"))
     .def("get_serialized_size_bytes", &frequent_items_sketch<T>::get_serialized_size_bytes)
     .def("serialize", &dspy::FISketch_serialize<T>)
     .def_static("deserialize", &dspy::FISketch_deserialize<T>)
diff --git a/python/src/hll_wrapper.cpp b/python/src/hll_wrapper.cpp
index aaf9257..28c17cf 100644
--- a/python/src/hll_wrapper.cpp
+++ b/python/src/hll_wrapper.cpp
@@ -71,9 +71,9 @@ void init_hll(py::module &m) {
     .export_values();
 
   py::class_<HllSketch<>>(m, "hll_sketch")
-    .def(py::init<int>())
-    .def(py::init<int, TgtHllType>())
-    .def(py::init<int, TgtHllType, bool>())
+    .def(py::init<int>(), py::arg("lg_k"))
+    .def(py::init<int, TgtHllType>(), py::arg("lg_k"), py::arg("tgt_hll_type"))
+    .def(py::init<int, TgtHllType, bool>(), py::arg("lg_k"), py::arg("tgt_hll_type"), py::arg("start_max_size")=false)
     .def_static("deserialize", &dspy::HllSketch_deserialize)
     .def("serialize_compact", &dspy::HllSketch_serializeCompact)
     .def("serialize_updatable", &dspy::HllSketch_serializeUpdatable)
@@ -85,23 +85,24 @@ void init_hll(py::module &m) {
     .def_property_readonly("tgt_hll_type", &HllSketch<>::getTgtHllType)
     .def("get_estimate", &HllSketch<>::getEstimate)
     .def("get_composite_estimate", &HllSketch<>::getCompositeEstimate)
-    .def("get_lower_bound", &HllSketch<>::getLowerBound)
-    .def("get_upper_bound", &HllSketch<>::getUpperBound)
+    .def("get_lower_bound", &HllSketch<>::getLowerBound, py::arg("num_std_devs"))
+    .def("get_upper_bound", &HllSketch<>::getUpperBound, py::arg("num_std_devs"))
     .def("is_compact", &HllSketch<>::isCompact)
     .def("is_empty", &HllSketch<>::isEmpty)
     .def("get_updatable_serialization_bytes", &HllSketch<>::getUpdatableSerializationBytes)
     .def("get_compact_serialization_bytes", &HllSketch<>::getCompactSerializationBytes)
     .def("reset", &HllSketch<>::reset)
-    .def("update", (void (HllSketch<>::*)(uint64_t)) &HllSketch<>::update)
-    .def("update", (void (HllSketch<>::*)(int64_t)) &HllSketch<>::update)
-    .def("update", (void (HllSketch<>::*)(double)) &HllSketch<>::update)
-    .def("update", (void (HllSketch<>::*)(const std::string&)) &HllSketch<>::update)
-    .def_static("get_max_updatable_serialization_bytes", &HllSketch<>::getMaxUpdatableSerializationBytes)
-    .def_static("get_rel_err", &HllSketch<>::getRelErr)
+    .def("update", (void (HllSketch<>::*)(int64_t)) &HllSketch<>::update, py::arg("datum"))
+    .def("update", (void (HllSketch<>::*)(double)) &HllSketch<>::update, py::arg("datum"))
+    .def("update", (void (HllSketch<>::*)(const std::string&)) &HllSketch<>::update, py::arg("datum"))
+    .def_static("get_max_updatable_serialization_bytes", &HllSketch<>::getMaxUpdatableSerializationBytes,
+         py::arg("lg_k"), py::arg("tgt_hll_type"))
+    .def_static("get_rel_err", &HllSketch<>::getRelErr,
+         py::arg("upper_bound"), py::arg("unioned"), py::arg("lg_k"), py::arg("num_std_devs"))
     ;
 
   py::class_<HllUnion<>>(m, "hll_union")
-    .def(py::init<int>())
+    .def(py::init<int>(), py::arg("lg_max_k"))
     .def_static("deserialize", &dspy::HllUnion_deserialize)
     .def("serialize_compact", &dspy::HllUnion_serializeCompact)
     .def("serialize_updatable", &dspy::HllUnion_serializeUpdatable)
@@ -113,20 +114,20 @@ void init_hll(py::module &m) {
     .def_property_readonly("tgt_hll_type", &HllUnion<>::getTgtHllType)
     .def("get_estimate", &HllUnion<>::getEstimate)
     .def("get_composite_estimate", &HllUnion<>::getCompositeEstimate)
-    .def("get_lower_bound", &HllUnion<>::getLowerBound)
-    .def("get_upper_bound", &HllUnion<>::getUpperBound)
-    .def("is_Compact", &HllUnion<>::isCompact)
+    .def("get_lower_bound", &HllUnion<>::getLowerBound, py::arg("num_std_devs"))
+    .def("get_upper_bound", &HllUnion<>::getUpperBound, py::arg("num_std_devs"))
+    .def("is_compact", &HllUnion<>::isCompact)
     .def("is_empty", &HllUnion<>::isEmpty)
     .def("get_updatable_serialization_bytes", &HllUnion<>::getUpdatableSerializationBytes)
     .def("get_compact_serialization_bytes", &HllUnion<>::getCompactSerializationBytes)
     .def("reset", &HllUnion<>::reset)
     .def("get_result", &HllUnion<>::getResult, py::arg("tgt_hll_type")=HLL_4)
-    .def<void (HllUnion<>::*)(const HllSketch<>&)>("update", &HllUnion<>::update)
-    .def<void (HllUnion<>::*)(uint64_t)>("update", &HllUnion<>::update)
-    .def<void (HllUnion<>::*)(int64_t)>("update", &HllUnion<>::update)
-    .def<void (HllUnion<>::*)(double)>("update", &HllUnion<>::update)
-    .def<void (HllUnion<>::*)(const std::string&)>("update", &HllUnion<>::update)
-    .def_static("get_max_serialization_bytes", &HllUnion<>::getMaxSerializationBytes)
-    .def_static("get_rel_err", &HllUnion<>::getRelErr)
+    .def<void (HllUnion<>::*)(const HllSketch<>&)>("update", &HllUnion<>::update, py::arg("sketch"))
+    .def<void (HllUnion<>::*)(int64_t)>("update", &HllUnion<>::update, py::arg("datum"))
+    .def<void (HllUnion<>::*)(double)>("update", &HllUnion<>::update, py::arg("datum"))
+    .def<void (HllUnion<>::*)(const std::string&)>("update", &HllUnion<>::update, py::arg("datum"))
+    .def_static("get_max_serialization_bytes", &HllUnion<>::getMaxSerializationBytes, py::arg("lg_k"))
+    .def_static("get_rel_err", &HllUnion<>::getRelErr,
+         py::arg("upper_bound"), py::arg("unioned"), py::arg("lg_k"), py::arg("num_std_devs"))
     ;
 }
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 7219f06..4263bd5 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -110,10 +110,10 @@ void bind_kll_sketch(py::module &m, const char* name) {
   using namespace datasketches;
 
   py::class_<kll_sketch<T>>(m, name)
-    .def(py::init<uint16_t>())
+    .def(py::init<uint16_t>(), py::arg("k"))
     .def(py::init<const kll_sketch<T>&>())
-    .def("update", &kll_sketch<T>::update)
-    .def("merge", &kll_sketch<T>::merge)
+    .def("update", &kll_sketch<T>::update, py::arg("item"))
+    .def("merge", &kll_sketch<T>::merge, py::arg("sketch"))
     .def("__str__", &dspy::KllSketch_toString<T>)
     .def("is_empty", &kll_sketch<T>::is_empty)
     .def("get_n", &kll_sketch<T>::get_n)
@@ -121,13 +121,15 @@ void bind_kll_sketch(py::module &m, const char* name) {
     .def("is_estimation_mode", &kll_sketch<T>::is_estimation_mode)
     .def("get_min_value", &kll_sketch<T>::get_min_value)
     .def("get_max_value", &kll_sketch<T>::get_max_value)
-    .def("get_quantile", &kll_sketch<T>::get_quantile)
-    .def("get_quantiles", &dspy::KllSketch_getQuantiles<T>)
-    .def("get_rank", &kll_sketch<T>::get_rank)
-    .def("get_pmf", &dspy::KllSketch_getPMF<T>)
-    .def("get_cdf", &dspy::KllSketch_getCDF<T>)
-    .def("normalized_rank_error", (double (kll_sketch<T>::*)(bool) const) &kll_sketch<T>::get_normalized_rank_error)
-    .def_static("get_normalized_rank_error", &dspy::KllSketch_generalNormalizedRankError<T>)
+    .def("get_quantile", &kll_sketch<T>::get_quantile, py::arg("fraction"))
+    .def("get_quantiles", &dspy::KllSketch_getQuantiles<T>, py::arg("fractions"))
+    .def("get_rank", &kll_sketch<T>::get_rank, py::arg("value"))
+    .def("get_pmf", &dspy::KllSketch_getPMF<T>, py::arg("split_points"))
+    .def("get_cdf", &dspy::KllSketch_getCDF<T>, py::arg("split_points"))
+    .def("normalized_rank_error", (double (kll_sketch<T>::*)(bool) const) &kll_sketch<T>::get_normalized_rank_error,
+         py::arg("as_pmf"))
+    .def_static("get_normalized_rank_error", &dspy::KllSketch_generalNormalizedRankError<T>,
+         py::arg("k"), py::arg("as_pmf"))
     // can't yet get this one to work
     //.def("get_serialized_size_bytes", &kll_sketch<T>::get_serialized_size_bytes)
     // this doesn't seem to be defined in the class
diff --git a/python/src/theta_wrapper.cpp b/python/src/theta_wrapper.cpp
new file mode 100644
index 0000000..964e597
--- /dev/null
+++ b/python/src/theta_wrapper.cpp
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <theta_sketch.hpp>
+#include <theta_union.hpp>
+#include <theta_intersection.hpp>
+#include <theta_a_not_b.hpp>
+
+#include <pybind11/pybind11.h>
+#include <sstream>
+
+namespace py = pybind11;
+
+namespace datasketches {
+namespace python {
+
+theta_sketch* theta_sketch_deserialize(py::bytes skBytes,
+                                       uint64_t seed = update_theta_sketch::builder::DEFAULT_SEED) {
+  std::string skStr = skBytes; // implicit cast  
+  return theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed).release();
+}
+
+py::object theta_sketch_serialize(const theta_sketch& sk) {
+  auto serResult = sk.serialize();
+  return py::bytes((char*)serResult.first.get(), serResult.second);
+}
+
+std::string theta_sketch_to_string(const theta_sketch& sk,
+                                   bool print_items = false) {
+  std::ostringstream ss;
+  sk.to_stream(ss, print_items);
+  return ss.str();
+}
+
+uint16_t theta_sketch_get_seed_hash(const theta_sketch& sk) {
+  return sk.get_seed_hash();
+}
+
+update_theta_sketch update_theta_sketch_deserialize(py::bytes skBytes,
+                                                     uint64_t seed = update_theta_sketch::builder::DEFAULT_SEED) {
+  std::string skStr = skBytes; // implicit cast  
+  return update_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
+}
+
+compact_theta_sketch compact_theta_sketch_deserialize(py::bytes skBytes,
+                                                      uint64_t seed = update_theta_sketch::builder::DEFAULT_SEED) {
+  std::string skStr = skBytes; // implicit cast  
+  return compact_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
+}
+
+}
+}
+
+namespace dspy = datasketches::python;
+
+void init_theta(py::module &m) {
+  using namespace datasketches;
+
+  py::class_<theta_sketch>(m, "theta_sketch")
+    .def("serialize", &dspy::theta_sketch_serialize)
+    .def_static("deserialize", &dspy::theta_sketch_deserialize, py::arg("bytes"), py::arg("seed")=update_theta_sketch::builder::DEFAULT_SEED)
+    .def("__str__", &dspy::theta_sketch_to_string, py::arg("print_items")=false)
+    .def("to_string", &dspy::theta_sketch_to_string, py::arg("print_items")=false)
+    .def("is_empty", &theta_sketch::is_empty)
+    .def("get_estimate", &theta_sketch::get_estimate)
+    .def("get_upper_bound", &theta_sketch::get_upper_bound, py::arg("num_std_devs"))
+    .def("get_lower_bound", &theta_sketch::get_lower_bound, py::arg("num_std_devs"))
+    .def("is_estimation_mode", &theta_sketch::is_estimation_mode)
+    .def("get_theta", &theta_sketch::get_theta)
+    .def("get_num_retained", &theta_sketch::get_num_retained)
+    .def("get_seed_hash", &dspy::theta_sketch_get_seed_hash)
+    .def("is_ordered", &theta_sketch::is_ordered)
+  ;
+
+  py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
+    .def(py::init<const update_theta_sketch&>())
+    .def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"))
+    .def("update", (void (update_theta_sketch::*)(double)) &update_theta_sketch::update, py::arg("datum"))
+    .def("update", (void (update_theta_sketch::*)(const std::string&)) &update_theta_sketch::update, py::arg("datum"))
+    .def("compact", &update_theta_sketch::compact, py::arg("ordered")=true)
+    .def_static("deserialize", &dspy::update_theta_sketch_deserialize)
+  ;
+
+  py::class_<compact_theta_sketch, theta_sketch>(m, "compact_theta_sketch")
+    .def(py::init<const compact_theta_sketch&>())
+    .def(py::init<const theta_sketch&, bool>())
+    .def_static("deserialize", &dspy::compact_theta_sketch_deserialize)
+  ;
+
+  py::class_<theta_union>(m, "theta_union")
+    .def("update", &theta_union::update, py::arg("sketch"))
+    .def("get_result", &theta_union::get_result, py::arg("ordered")=true)
+  ;
+
+  py::class_<theta_intersection>(m, "theta_intersection")
+    .def(py::init<uint64_t>(), py::arg("seed")=update_theta_sketch::builder::DEFAULT_SEED)
+    .def(py::init<const theta_intersection&>())
+    .def("update", &theta_intersection::update, py::arg("sketch"))
+    .def("get_result", &theta_intersection::get_result, py::arg("ordered")=true)
+    .def("has_result", &theta_intersection::has_result)
+  ;
+
+  py::class_<theta_a_not_b>(m, "theta_a_not_b")
+    .def(py::init<uint64_t>(), py::arg("seed")=update_theta_sketch::builder::DEFAULT_SEED)
+    .def("compute", &theta_a_not_b::compute, py::arg("a"), py::arg("b"), py::arg("ordered")=true)
+  ;
+
+  // builders
+  py::class_<update_theta_sketch::builder>(m, "theta_sketch_builder")
+    .def(py::init<>())
+    .def("set_lg_k", &update_theta_sketch::builder::set_lg_k)
+    .def("set_p", &update_theta_sketch::builder::set_p)
+    .def("set_seed", &update_theta_sketch::builder::set_seed)
+    .def("build", &update_theta_sketch::builder::build)
+  ;
+
+  py::class_<theta_union::builder>(m, "theta_union_builder")
+    .def(py::init<>())
+    .def("set_lg_k", &theta_union::builder::set_lg_k)
+    .def("set_p", &theta_union::builder::set_p)
+    .def("set_seed", &theta_union::builder::set_seed)
+    .def("build", &theta_union::builder::build)
+  ;
+
+}
diff --git a/theta/include/theta_sketch_impl.hpp b/theta/include/theta_sketch_impl.hpp
index af707d4..ffdaf53 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -24,6 +24,8 @@
 #include <cmath>
 #include <memory>
 #include <functional>
+#include <istream>
+#include <ostream>
 
 #include "MurmurHash3.h"
 #include "serde.hpp"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org