You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2017/11/26 22:12:30 UTC

[arrow] branch master updated: ARROW-1758: [Python] Remove pickle=True option for object serialization

This is an automated email from the ASF dual-hosted git repository.

pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 85e2d89  ARROW-1758: [Python] Remove pickle=True option for object serialization
85e2d89 is described below

commit 85e2d8960d5aeeb04d8b59e6e7d8a8266a7d095f
Author: Licht-T <li...@outlook.jp>
AuthorDate: Sun Nov 26 14:12:22 2017 -0800

    ARROW-1758: [Python] Remove pickle=True option for object serialization
    
    This closes [ARROW-1758](https://issues.apache.org/jira/browse/ARROW-1758).
    
    Author: Licht-T <li...@outlook.jp>
    Author: Wes McKinney <we...@twosigma.com>
    
    Closes #1347 from Licht-T/clean-pickle-option-for-object-serialization and squashes the following commits:
    
    927f154 [Wes McKinney] Use cloudpickle for lambda serialization if available
    ba998dd [Licht-T] CLN: Remove pickle=True option for object serialization
---
 python/pyarrow/serialization.pxi           | 14 ++------------
 python/pyarrow/serialization.py            | 13 +++++++++++--
 python/pyarrow/tests/test_serialization.py |  9 ++++++---
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index 6b72277..3ee5c7d 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -47,7 +47,6 @@ cdef class SerializationContext:
     cdef:
         object type_to_type_id
         object whitelisted_types
-        object types_to_pickle
         object custom_serializers
         object custom_deserializers
 
@@ -55,11 +54,10 @@ cdef class SerializationContext:
         # Types with special serialization handlers
         self.type_to_type_id = dict()
         self.whitelisted_types = dict()
-        self.types_to_pickle = set()
         self.custom_serializers = dict()
         self.custom_deserializers = dict()
 
-    def register_type(self, type_, type_id, pickle=False,
+    def register_type(self, type_, type_id,
                       custom_serializer=None, custom_deserializer=None):
         """EXPERIMENTAL: Add type to the list of types we can serialize.
 
@@ -69,9 +67,6 @@ cdef class SerializationContext:
             The type that we can serialize.
         type_id : bytes
             A string of bytes used to identify the type.
-        pickle : bool
-            True if the serialization should be done with pickle.
-            False if it should be done efficiently with Arrow.
         custom_serializer : callable
             This argument is optional, but can be provided to
             serialize objects of the class in a particular way.
@@ -81,8 +76,6 @@ cdef class SerializationContext:
         """
         self.type_to_type_id[type_] = type_id
         self.whitelisted_types[type_id] = type_
-        if pickle:
-            self.types_to_pickle.add(type_id)
         if custom_serializer is not None:
             self.custom_serializers[type_id] = custom_serializer
             self.custom_deserializers[type_id] = custom_deserializer
@@ -102,9 +95,7 @@ cdef class SerializationContext:
 
         # use the closest match to type(obj)
         type_id = self.type_to_type_id[type_]
-        if type_id in self.types_to_pickle:
-            serialized_obj = {"data": pickle.dumps(obj), "pickle": True}
-        elif type_id in self.custom_serializers:
+        if type_id in self.custom_serializers:
             serialized_obj = {"data": self.custom_serializers[type_id](obj)}
         else:
             if is_named_tuple(type_):
@@ -125,7 +116,6 @@ cdef class SerializationContext:
             # The object was pickled, so unpickle it.
             obj = pickle.loads(serialized_obj["data"])
         else:
-            assert type_id not in self.types_to_pickle
             if type_id not in self.whitelisted_types:
                 msg = "Type ID " + str(type_id) + " not registered in " \
                       "deserialization callback"
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 2b47513..ab25b63 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -17,12 +17,18 @@
 
 from collections import OrderedDict, defaultdict
 import sys
+import pickle
 
 import numpy as np
 
 from pyarrow import serialize_pandas, deserialize_pandas
 from pyarrow.lib import _default_serialization_context
 
+try:
+    import cloudpickle
+except ImportError:
+    cloudpickle = pickle
+
 
 def register_default_serialization_handlers(serialization_context):
 
@@ -67,9 +73,12 @@ def register_default_serialization_handlers(serialization_context):
 
     serialization_context.register_type(
         type(lambda: 0), "function",
-        pickle=True)
+        custom_serializer=cloudpickle.dumps,
+        custom_deserializer=cloudpickle.loads)
 
-    serialization_context.register_type(type, "type", pickle=True)
+    serialization_context.register_type(type, "type",
+                                        custom_serializer=cloudpickle.dumps,
+                                        custom_deserializer=cloudpickle.loads)
 
     # ----------------------------------------------------------------------
     # Set up serialization for numpy with dtype object (primitive types are
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index b0c5bc4..ed4fd9a 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -23,6 +23,7 @@ from collections import namedtuple, OrderedDict, defaultdict
 import datetime
 import string
 import sys
+import pickle
 
 import pyarrow as pa
 import numpy as np
@@ -197,7 +198,9 @@ def make_serialization_context():
     context.register_type(Baz, "Baz")
     context.register_type(Qux, "Quz")
     context.register_type(SubQux, "SubQux")
-    context.register_type(SubQuxPickle, "SubQuxPickle", pickle=True)
+    context.register_type(SubQuxPickle, "SubQuxPickle",
+                          custom_serializer=pickle.dumps,
+                          custom_deserializer=pickle.loads)
     context.register_type(Exception, "Exception")
     context.register_type(CustomError, "CustomError")
     context.register_type(Point, "Point")
@@ -338,7 +341,7 @@ def test_serialization_callback_numpy():
         return serialized_obj
 
     pa._default_serialization_context.register_type(
-        DummyClass, "DummyClass", pickle=False,
+        DummyClass, "DummyClass",
         custom_serializer=serialize_dummy_class,
         custom_deserializer=deserialize_dummy_class)
 
@@ -357,7 +360,7 @@ def test_buffer_serialization():
         return serialized_obj
 
     pa._default_serialization_context.register_type(
-        BufferClass, "BufferClass", pickle=False,
+        BufferClass, "BufferClass",
         custom_serializer=serialize_buffer_class,
         custom_deserializer=deserialize_buffer_class)
 

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].