You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2021/08/30 18:02:01 UTC

[datasketches-cpp] branch pybind_build_dependency created (now 0d3635f)

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a change to branch pybind_build_dependency
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git.


      at 0d3635f  fix python compile issues on ubuntu+gcc10 docker by redefining constants in namespaces rather than inside class template

This branch includes the following new commits:

     new 847babe  turn pybind11 into python build dependency, removing from source tree.  docs no yet updated.
     new 0d3635f  fix python compile issues on ubuntu+gcc10 docker by redefining constants in namespaces rather than inside class template

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 01/02: turn pybind11 into python build dependency, removing from source tree. docs no yet updated.

Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch pybind_build_dependency
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 847babec95c6bfee6774e23ab7d4c736e925ad2c
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Mon Aug 30 11:00:30 2021 -0700

    turn pybind11 into python build dependency, removing from source tree.  docs no yet updated.
---
 .gitmodules           | 3 ---
 pyproject.toml        | 4 +++-
 python/CMakeLists.txt | 7 +++++--
 python/pybind11       | 1 -
 setup.py              | 3 ++-
 5 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index 4dbf077..e69de29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "python/pybind11"]
-	path = python/pybind11
-	url = https://github.com/pybind/pybind11
diff --git a/pyproject.toml b/pyproject.toml
index 077b885..a989be3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,8 @@
 requires = ["wheel",
             "setuptools >= 30.3.0",
             "setuptools_scm",
-            "cmake >= 3.12"]
+            "cmake >= 3.12",
+            "pybind11[global] >= 2.6.0"]
 
 [tool.tox]
 legacy_tox_ini = """
@@ -12,6 +13,7 @@ envlist = py3
 [testenv]
 deps = pytest
        numpy
+       pybind11[global] >= 2.6.0
 changedir = python/tests
 commands = pytest
 """
\ No newline at end of file
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 0b76799..4da7b9d 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -22,9 +22,12 @@ else()
   set(PYBIND11_CPP_STANDARD -std=c++11)
 endif()
 
-add_subdirectory(pybind11)
+find_package(Python3 COMPONENTS Interpreter Development)
+find_package(pybind11 CONFIG)
+#add_subdirectory(pybind11)
 
-pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
+#pybind11_add_module(python MODULE EXCLUDE_FROM_ALL SYSTEM THIN_LTO)
+pybind11_add_module(python MODULE EXCLUDE_FROM_ALL THIN_LTO)
 
 target_link_libraries(python
   PRIVATE
diff --git a/python/pybind11 b/python/pybind11
deleted file mode 160000
index 59a2ac2..0000000
--- a/python/pybind11
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 59a2ac2745d8a57ac94c6accced73620d59fb844
diff --git a/setup.py b/setup.py
index 9d270d9..a84954e 100644
--- a/setup.py
+++ b/setup.py
@@ -49,8 +49,9 @@ class CMakeBuild(build_ext):
             os.path.dirname(self.get_ext_fullpath(ext.name)))
         cmake_args =  ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
         cmake_args += ['-DWITH_PYTHON=True']
+        cmake_args += ['-DCMAKE_CXX_STANDARD=11']
         # ensure we use a consistent python version
-        cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
+        cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
         cfg = 'Debug' if self.debug else 'Release'
         build_args = ['--config', cfg]
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 02/02: fix python compile issues on ubuntu+gcc10 docker by redefining constants in namespaces rather than inside class template

Posted by jm...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch pybind_build_dependency
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 0d3635f95440a043d766059f78c4082c4a229537
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Mon Aug 30 11:01:42 2021 -0700

    fix python compile issues on ubuntu+gcc10 docker by redefining constants in namespaces rather than inside class template
---
 common/include/common_defs.hpp                  |  2 ++
 cpc/include/cpc_common.hpp                      | 13 ++++++++++---
 cpc/include/cpc_sketch.hpp                      |  2 +-
 cpc/include/cpc_union.hpp                       |  2 +-
 kll/include/kll_sketch.hpp                      |  9 +++++++--
 python/src/cpc_wrapper.cpp                      |  2 +-
 python/src/kll_wrapper.cpp                      |  2 +-
 python/src/theta_wrapper.cpp                    |  4 ++--
 python/src/vector_of_kll.cpp                    | 16 +++++++++++-----
 sampling/include/var_opt_sketch.hpp             | 15 ++++++++++-----
 sampling/include/var_opt_sketch_impl.hpp        |  2 +-
 theta/include/theta_constants.hpp               | 13 +++++++++----
 theta/include/theta_update_sketch_base.hpp      |  7 +++++--
 theta/include/theta_update_sketch_base_impl.hpp |  6 +++++-
 14 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/common/include/common_defs.hpp b/common/include/common_defs.hpp
index 778f851..dadcaac 100644
--- a/common/include/common_defs.hpp
+++ b/common/include/common_defs.hpp
@@ -29,6 +29,8 @@ namespace datasketches {
 
 static const uint64_t DEFAULT_SEED = 9001;
 
+enum resize_factor { X1 = 0, X2, X4, X8 };
+
 template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
 template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
 
diff --git a/cpc/include/cpc_common.hpp b/cpc/include/cpc_common.hpp
index cde110f..feb1e15 100644
--- a/cpc/include/cpc_common.hpp
+++ b/cpc/include/cpc_common.hpp
@@ -26,9 +26,16 @@
 
 namespace datasketches {
 
-static const uint8_t CPC_MIN_LG_K = 4;
-static const uint8_t CPC_MAX_LG_K = 26;
-static const uint8_t CPC_DEFAULT_LG_K = 11;
+namespace cpc_constants {
+    const uint8_t MIN_LG_K = 4;
+    const uint8_t MAX_LG_K = 26;
+    const uint8_t DEFAULT_LG_K = 11;
+}
+
+// TODO: Redundant and deprecated. Will be removed in next major version release.
+static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
+static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
+static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
 
 template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
 template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
diff --git a/cpc/include/cpc_sketch.hpp b/cpc/include/cpc_sketch.hpp
index cdc4d16..c000ac9 100644
--- a/cpc/include/cpc_sketch.hpp
+++ b/cpc/include/cpc_sketch.hpp
@@ -67,7 +67,7 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+  explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   using allocator_type = A;
   A get_allocator() const;
diff --git a/cpc/include/cpc_union.hpp b/cpc/include/cpc_union.hpp
index dd59abc..54fbed5 100644
--- a/cpc/include/cpc_union.hpp
+++ b/cpc/include/cpc_union.hpp
@@ -45,7 +45,7 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+  explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
 
   cpc_union_alloc(const cpc_union_alloc<A>& other);
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
diff --git a/kll/include/kll_sketch.hpp b/kll/include/kll_sketch.hpp
index 28ea8ea..fa55520 100644
--- a/kll/include/kll_sketch.hpp
+++ b/kll/include/kll_sketch.hpp
@@ -153,6 +153,10 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
 template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
 template<typename A> using vector_d = std::vector<double, AllocD<A>>;
 
+namespace kll_constants {
+  const uint16_t DEFAULT_K = 200;
+}
+
 template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
 class kll_sketch {
   public:
@@ -160,11 +164,12 @@ class kll_sketch {
     using comparator = C;
 
     static const uint8_t DEFAULT_M = 8;
-    static const uint16_t DEFAULT_K = 200;
+    // TODO: Redundant and deprecated. Will be remove din next major version.
+    static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
     static const uint16_t MIN_K = DEFAULT_M;
     static const uint16_t MAX_K = (1 << 16) - 1;
 
-    explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
+    explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
     kll_sketch(const kll_sketch& other);
     kll_sketch(kll_sketch&& other) noexcept;
     ~kll_sketch();
diff --git a/python/src/cpc_wrapper.cpp b/python/src/cpc_wrapper.cpp
index 673e3c7..deae7f6 100644
--- a/python/src/cpc_wrapper.cpp
+++ b/python/src/cpc_wrapper.cpp
@@ -53,7 +53,7 @@ void init_cpc(py::module &m) {
   using namespace datasketches;
 
   py::class_<cpc_sketch>(m, "cpc_sketch")
-    .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=CPC_DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
+    .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=cpc_constants::DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
     .def(py::init<const cpc_sketch&>())
     .def("__str__", &cpc_sketch::to_string,
          "Produces a string summary of the sketch")
diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index d356358..48004b9 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
   using namespace datasketches;
 
   py::class_<kll_sketch<T>>(m, name)
-    .def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
+    .def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
     .def(py::init<const kll_sketch<T>&>())
     .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
          "Updates the sketch with the given value")
diff --git a/python/src/theta_wrapper.cpp b/python/src/theta_wrapper.cpp
index cf8fdc7..86a2406 100644
--- a/python/src/theta_wrapper.cpp
+++ b/python/src/theta_wrapper.cpp
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {
 
   py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
     .def(py::init(&dspy::update_theta_sketch_factory),
-         py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
+         py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
     .def(py::init<const update_theta_sketch&>())
     .def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
          "Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {
 
   py::class_<theta_union>(m, "theta_union")
     .def(py::init(&dspy::theta_union_factory),
-         py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
+         py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
     .def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
          "Updates the union with the given sketch")
     .def("get_result", &theta_union::get_result, py::arg("ordered")=true,
diff --git a/python/src/vector_of_kll.cpp b/python/src/vector_of_kll.cpp
index b8d8a91..5a2ef1d 100644
--- a/python/src/vector_of_kll.cpp
+++ b/python/src/vector_of_kll.cpp
@@ -29,14 +29,20 @@ namespace py = pybind11;
 
 namespace datasketches {
 
+namespace vector_of_kll_constants {
+  static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
+  static const uint32_t DEFAULT_D = 1;
+}
+
 // Wrapper class for Numpy compatibility
 template <typename T, typename C = std::less<T>, typename S = serde<T>>
 class vector_of_kll_sketches {
   public:
-    static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
-    static const uint32_t DEFAULT_D = 1;
+    // TODO: Redundant and deprecated. Will be removed in next major version release.
+    static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
+    static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
 
-    explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
+    explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
     vector_of_kll_sketches(const vector_of_kll_sketches& other);
     vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
     vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
   using namespace datasketches;
 
   py::class_<vector_of_kll_sketches<T>>(m, name)
-    .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K, 
-                                         py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
+    .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K, 
+                                         py::arg("d")=vector_of_kll_constants::DEFAULT_D)
     .def(py::init<const vector_of_kll_sketches<T>&>())
     // allow user to retrieve k or d, in case it's instantiated w/ defaults
     .def("get_k", &vector_of_kll_sketches<T>::get_k,
diff --git a/sampling/include/var_opt_sketch.hpp b/sampling/include/var_opt_sketch.hpp
index 3f5110c..a949899 100644
--- a/sampling/include/var_opt_sketch.hpp
+++ b/sampling/include/var_opt_sketch.hpp
@@ -51,18 +51,23 @@ struct subset_summary {
   double total_sketch_weight;
 };
 
-enum resize_factor { X1 = 0, X2, X4, X8 };
-
 template <typename T, typename S, typename A> class var_opt_union; // forward declaration
 
+namespace var_opt_constants {
+    const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+    const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2; 
+}
+
 template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
 class var_opt_sketch {
 
   public:
-    static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
-    static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
+    static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
+    static const uint32_t MAX_K = var_opt_constants::MAX_K;
 
-    explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
+    explicit var_opt_sketch(uint32_t k,
+      resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
+      const A& allocator = A());
     var_opt_sketch(const var_opt_sketch& other);
     var_opt_sketch(var_opt_sketch&& other) noexcept;
 
diff --git a/sampling/include/var_opt_sketch_impl.hpp b/sampling/include/var_opt_sketch_impl.hpp
index dbfe5a4..ce0ac40 100644
--- a/sampling/include/var_opt_sketch_impl.hpp
+++ b/sampling/include/var_opt_sketch_impl.hpp
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
   r_(r_count),
   n_(n),
   total_wt_r_(total_wt_r),
-  rf_(DEFAULT_RESIZE_FACTOR),
+  rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
   curr_items_alloc_(len),
   filled_data_(n > k),
   allocator_(allocator),
diff --git a/theta/include/theta_constants.hpp b/theta/include/theta_constants.hpp
index d5d6fd9..e47457b 100644
--- a/theta/include/theta_constants.hpp
+++ b/theta/include/theta_constants.hpp
@@ -21,14 +21,19 @@
 #define THETA_CONSTANTS_HPP_
 
 #include <climits>
+#include "common_defs.hpp"
 
 namespace datasketches {
 
 namespace theta_constants {
-  enum resize_factor { X1, X2, X4, X8 };
-  static const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
-  static const uint8_t MIN_LG_K = 5;
-  static const uint8_t MAX_LG_K = 26;
+  using resize_factor = datasketches::resize_factor;
+  //enum resize_factor { X1, X2, X4, X8 };
+  const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
+  const uint8_t MIN_LG_K = 5;
+  const uint8_t MAX_LG_K = 26;
+
+  const uint8_t DEFAULT_LG_K = 12;
+  const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
 }
 
 } /* namespace datasketches */
diff --git a/theta/include/theta_update_sketch_base.hpp b/theta/include/theta_update_sketch_base.hpp
index 4be1040..9d51c62 100644
--- a/theta/include/theta_update_sketch_base.hpp
+++ b/theta/include/theta_update_sketch_base.hpp
@@ -94,11 +94,14 @@ struct theta_update_sketch_base {
 template<typename Derived, typename Allocator>
 class theta_base_builder {
 public:
+  // TODO: Redundant and deprecated. Will be removed in next major verison release.
   using resize_factor = theta_constants::resize_factor;
   static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
   static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
-  static const uint8_t DEFAULT_LG_K = 12;
-  static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
+  // TODO: The following defaults are redundant and deprecated. Will be removed in the
+  //       next major version release
+  static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
+  static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
 
   /**
    * Creates and instance of the builder with default parameters.
diff --git a/theta/include/theta_update_sketch_base_impl.hpp b/theta/include/theta_update_sketch_base_impl.hpp
index 947f085..b4a3124 100644
--- a/theta/include/theta_update_sketch_base_impl.hpp
+++ b/theta/include/theta_update_sketch_base_impl.hpp
@@ -271,7 +271,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
 
 template<typename Derived, typename Allocator>
 theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
-allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
+allocator_(allocator),
+lg_k_(theta_constants::DEFAULT_LG_K),
+rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
+p_(1),
+seed_(DEFAULT_SEED) {}
 
 template<typename Derived, typename Allocator>
 Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org