You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by al...@apache.org on 2023/06/12 19:56:47 UTC

[datasketches-cpp] branch add_trim created (now 0918a80)

This is an automated email from the ASF dual-hosted git repository.

alsay pushed a change to branch add_trim
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git


      at 0918a80  added trim() and tests

This branch includes the following new commits:

     new 0918a80  added trim() and tests

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org


[datasketches-cpp] 01/01: added trim() and tests

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch add_trim
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 0918a806f560190d39cffb730b921ff7842e7d43
Author: AlexanderSaydakov <Al...@users.noreply.github.com>
AuthorDate: Mon Jun 12 12:56:33 2023 -0700

    added trim() and tests
---
 python/datasketches/TupleWrapper.py | 5 ++++-
 python/src/theta_wrapper.cpp        | 2 ++
 python/src/tuple_wrapper.cpp        | 1 +
 python/tests/theta_test.py          | 8 ++++++++
 python/tests/tuple_test.py          | 7 +++++++
 5 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/python/datasketches/TupleWrapper.py b/python/datasketches/TupleWrapper.py
index 3b38da0..97f0820 100644
--- a/python/datasketches/TupleWrapper.py
+++ b/python/datasketches/TupleWrapper.py
@@ -116,11 +116,14 @@ class update_tuple_sketch(tuple_sketch):
     """Returns a compacted form of the sketch, optionally sorting it."""
     return self._gadget.compact(ordered)
 
+  def trim(self):
+    """Removes retained entries in excess of the nominal size k (if any)."""
+    self._gadget.trim()
+
   def reset(self):
     """Resets the sketch to the initial empty state."""
     self._gadget.reset()
 
-
 class tuple_union:
   """An object that can merge Tuple Sketches. Requires a Policy object to handle merging Summaries."""
   _policy: TuplePolicy
diff --git a/python/src/theta_wrapper.cpp b/python/src/theta_wrapper.cpp
index f242ce5..033e6ca 100644
--- a/python/src/theta_wrapper.cpp
+++ b/python/src/theta_wrapper.cpp
@@ -76,6 +76,8 @@ void init_theta(py::module &m) {
          "Updates the sketch with the given string")
     .def("compact", &update_theta_sketch::compact, py::arg("ordered")=true,
          "Returns a compacted form of the sketch, optionally sorting it")
+    .def("trim", &update_theta_sketch::trim, "Removes retained entries in excess of the nominal size k (if any)")
+    .def("reset", &update_theta_sketch::reset, "Resets the sketch to the initial empty state")
   ;
 
   py::class_<compact_theta_sketch, theta_sketch>(m, "compact_theta_sketch")
diff --git a/python/src/tuple_wrapper.cpp b/python/src/tuple_wrapper.cpp
index 706621c..343181d 100644
--- a/python/src/tuple_wrapper.cpp
+++ b/python/src/tuple_wrapper.cpp
@@ -137,6 +137,7 @@ void init_tuple(py::module &m) {
          "Updates the sketch with the given string item and summary value")
     .def("compact", &py_update_tuple::compact, py::arg("ordered")=true,
          "Returns a compacted form of the sketch, optionally sorting it")
+    .def("trim", &py_update_tuple::trim, "Removes retained entries in excess of the nominal size k (if any)")
     .def("reset", &py_update_tuple::reset, "Resets the sketch to the initial empty state")
   ;
 
diff --git a/python/tests/theta_test.py b/python/tests/theta_test.py
index b3ca2da..f2798c4 100644
--- a/python/tests/theta_test.py
+++ b/python/tests/theta_test.py
@@ -54,6 +54,14 @@ class ThetaTest(unittest.TestCase):
           count = count + 1
         self.assertEqual(count, new_sk.get_num_retained())
 
+        num = sk.get_num_retained()
+        sk.trim()
+        self.assertLessEqual(sk.get_num_retained(), num)
+
+        sk.reset()
+        self.assertTrue(sk.is_empty())
+        self.assertEqual(sk.get_num_retained(), 0)
+
     def test_theta_set_operations(self):
         lgk = 12    # 2^k = 4096 rows in the table
         n = 1 << 18 # ~256k unique values
diff --git a/python/tests/tuple_test.py b/python/tests/tuple_test.py
index 2a298ef..6327599 100644
--- a/python/tests/tuple_test.py
+++ b/python/tests/tuple_test.py
@@ -75,6 +75,13 @@ class TupleTest(unittest.TestCase):
           cumSum += pair[1]
         self.assertEqual(cumSum, 5 * cts.get_num_retained())
 
+        num = sk.get_num_retained()
+        sk.trim()
+        self.assertLessEqual(sk.get_num_retained(), num)
+
+        sk.reset()
+        self.assertTrue(sk.is_empty())
+        self.assertEqual(sk.get_num_retained(), 0)
 
     def test_tuple_set_operations(self):
         lgk = 12    # 2^k = 4096 rows in the table


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org