You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@datasketches.apache.org by "jmalkin (via GitHub)" <gi...@apache.org> on 2023/02/11 00:52:30 UTC

[GitHub] [datasketches-cpp] jmalkin commented on a diff in pull request #343: Frequent items generic support

jmalkin commented on code in PR #343:
URL: https://github.com/apache/datasketches-cpp/pull/343#discussion_r1103451550


##########
python/src/fi_wrapper.cpp:
##########
@@ -73,37 +87,88 @@ void bind_fi_sketch(py::module &m, const char* name) {
     )
     .def_static(
         "get_epsilon_for_lg_size",
-        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T>::get_epsilon(lg_max_map_size); },
+        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T, W, H, E>::get_epsilon(lg_max_map_size); },
         py::arg("lg_max_map_size"),
         "Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
     )
     .def_static(
         "get_apriori_error",
-        &frequent_items_sketch<T>::get_apriori_error,
+        &frequent_items_sketch<T, W, H, E>::get_apriori_error,
         py::arg("lg_max_map_size"), py::arg("estimated_total_weight"),
         "Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
-    )
-    .def(
+    );
+
+    // serialization may need a caller-provided serde depending on teh sketch type, so

Review Comment:
   fixed



##########
python/src/fi_wrapper.cpp:
##########
@@ -73,37 +87,88 @@ void bind_fi_sketch(py::module &m, const char* name) {
     )
     .def_static(
         "get_epsilon_for_lg_size",
-        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T>::get_epsilon(lg_max_map_size); },
+        [](uint8_t lg_max_map_size) { return frequent_items_sketch<T, W, H, E>::get_epsilon(lg_max_map_size); },
         py::arg("lg_max_map_size"),
         "Returns the epsilon value used to compute a priori error for a given log2(max_map_size)"
     )
     .def_static(
         "get_apriori_error",
-        &frequent_items_sketch<T>::get_apriori_error,
+        &frequent_items_sketch<T, W, H, E>::get_apriori_error,
         py::arg("lg_max_map_size"), py::arg("estimated_total_weight"),
         "Returns the estimated a priori error given the max_map_size for the sketch and the estimated_total_stream_weight."
-    )
-    .def(
+    );
+
+    // serialization may need a caller-provided serde depending on teh sketch type, so
+    // we use a separate method to handle that appropriately based on type T.
+    add_serialization(fi_class);
+}
+
+// std::string or arithmetic types, for which we have a built-in serde
+template<typename T, typename W, typename H, typename E, typename std::enable_if<std::is_arithmetic<T>::value || std::is_same<std::string, T>::value, bool>::type>
+void add_serialization(py::class_<datasketches::frequent_items_sketch<T, W, H, E>>& clazz) {
+    using namespace datasketches;
+    clazz.def(
         "get_serialized_size_bytes",
-        [](const frequent_items_sketch<T>& sk) { return sk.get_serialized_size_bytes(); },
+        [](const frequent_items_sketch<T, W, H, E>& sk) { return sk.get_serialized_size_bytes(); },
         "Computes the size needed to serialize the current state of the sketch. This can be expensive since every item needs to be looked at."
     )
     .def(
         "serialize",
-        [](const frequent_items_sketch<T>& sk) {
+        [](const frequent_items_sketch<T, W, H, E>& sk) {
           auto bytes = sk.serialize();
           return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
         },
-        "Serializes the sketch into a bytes object"
+        "Serializes the sketch into a bytes object."
     )
     .def_static(
         "deserialize",
-        [](const std::string& bytes) { return frequent_items_sketch<T>::deserialize(bytes.data(), bytes.size()); },
+        [](const std::string& bytes) { return frequent_items_sketch<T, W, H, E>::deserialize(bytes.data(), bytes.size()); },
         py::arg("bytes"),
-        "Reads a bytes object and returns the corresponding frequent_strings_sketch"
+        "Reads a bytes object and returns the corresponding frequent_strings_sketch."
     );
 }
 
+// py::object or any other type that requires a provided serde
+template<typename T, typename W, typename H, typename E, typename std::enable_if<!std::is_arithmetic<T>::value && !std::is_same<std::string, T>::value, bool>::type>
+void add_serialization(py::class_<datasketches::frequent_items_sketch<T, W, H, E>>& clazz) {
+    using namespace datasketches;
+    clazz.def(
+        "get_serialized_size_bytes",
+        [](const frequent_items_sketch<T, W, H, E>& sk, py_object_serde& serde) { return sk.get_serialized_size_bytes(serde); },
+        py::arg("serde"),
+        "Computes the size needed to serialize the current state of the sketch using the provided serde. This can be expensive since every item needs to be looked at."
+    )
+    .def(
+        "serialize",
+        [](const frequent_items_sketch<T, W, H, E>& sk, py_object_serde& serde) {
+          auto bytes = sk.serialize(0, serde);
+          return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
+        }, py::arg("serde"),
+        "Serializes the sketch into a bytes object using the provided serde."
+    )
+    .def_static(
+        "deserialize",
+        [](const std::string& bytes, py_object_serde& serde) {
+          return frequent_items_sketch<T, W, H, E>::deserialize(bytes.data(), bytes.size(), serde);
+        }, py::arg("bytes"), py::arg("serde"),
+        "Reads a bytes object using the provided serde and returns the corresponding frequent_strings_sketch."
+    );
+}
+
+// calls class __hash__ method
+struct py_hash_caller {
+  size_t operator()(const py::object& a) {

Review Comment:
   right, this is done now



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@datasketches.apache.org
For additional commands, e-mail: commits-help@datasketches.apache.org