You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by jm...@apache.org on 2020/07/21 22:37:39 UTC

[incubator-datasketches-cpp] 01/01: add array input to kll sketches in python

This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch py_kll_array_input
in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-cpp.git

commit e5fea0e94db3d84526fa80a3ea6ae85b7a15eeda
Author: Jon Malkin <jm...@users.noreply.github.com>
AuthorDate: Tue Jul 21 15:37:17 2020 -0700

    add array input to kll sketches in python
---
 python/src/kll_wrapper.cpp | 15 +++++++++++++++
 python/tests/kll_test.py   |  8 ++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/python/src/kll_wrapper.cpp b/python/src/kll_wrapper.cpp
index 9a0e3e3..361cc5c 100644
--- a/python/src/kll_wrapper.cpp
+++ b/python/src/kll_wrapper.cpp
@@ -21,6 +21,7 @@
 
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
+#include <pybind11/numpy.h>
 #include <sstream>
 #include <vector>
 
@@ -92,6 +93,19 @@ py::list kll_sketch_get_cdf(const kll_sketch<T>& sk,
   return list;
 }
 
+template<typename T>
+void kll_sketch_update(kll_sketch<T>& sk, py::array_t<T, py::array::c_style | py::array::forcecast> items) {
+  if (items.ndim() != 1) {
+    throw std::invalid_argument("input data must have only one dimension. Found: "
+          + std::to_string(items.ndim()));
+  }
+  
+  auto data = items.template unchecked<1>();
+  for (uint32_t i = 0; i < data.size(); ++i) {
+    sk.update(data(i));
+  }
+}
+
 }
 }
 
@@ -105,6 +119,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
     .def(py::init<uint16_t>(), py::arg("k"))
     .def(py::init<const kll_sketch<T>&>())
     .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"))
+    .def("update", &dspy::kll_sketch_update<T>, py::arg("array"))
     .def("merge", (void (kll_sketch<T>::*)(const kll_sketch<T>&)) &kll_sketch<T>::merge, py::arg("sketch"))
     .def("__str__", &kll_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false)
     .def("to_string", &kll_sketch<T>::to_string, py::arg("print_levels")=false, py::arg("print_items")=false)
diff --git a/python/tests/kll_test.py b/python/tests/kll_test.py
index e670fdb..abe92ec 100644
--- a/python/tests/kll_test.py
+++ b/python/tests/kll_test.py
@@ -24,12 +24,12 @@ import numpy as np
 class KllTest(unittest.TestCase):
     def test_kll_example(self):
       k = 160
-      n = 2 ** 18
+      n = 2 ** 20
 
-      # create a sketch and inject ~1 million N(0,1) points
+      # create a sketch and inject ~1 million N(0,1) points as an array and as a single item
       kll = kll_floats_sketch(k)
-      for i in range(0, n):
-        kll.update(np.random.randn())
+      kll.update(np.random.normal(size=n-1))
+      kll.update(0.0)
 
       # 0 should be near the median
       self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.025)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org