You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by pc...@apache.org on 2017/11/26 22:12:30 UTC
[arrow] branch master updated: ARROW-1758: [Python] Remove
pickle=True option for object serialization
This is an automated email from the ASF dual-hosted git repository.
pcmoritz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 85e2d89 ARROW-1758: [Python] Remove pickle=True option for object serialization
85e2d89 is described below
commit 85e2d8960d5aeeb04d8b59e6e7d8a8266a7d095f
Author: Licht-T <li...@outlook.jp>
AuthorDate: Sun Nov 26 14:12:22 2017 -0800
ARROW-1758: [Python] Remove pickle=True option for object serialization
This closes [ARROW-1758](https://issues.apache.org/jira/browse/ARROW-1758).
Author: Licht-T <li...@outlook.jp>
Author: Wes McKinney <we...@twosigma.com>
Closes #1347 from Licht-T/clean-pickle-option-for-object-serialization and squashes the following commits:
927f154 [Wes McKinney] Use cloudpickle for lambda serialization if available
ba998dd [Licht-T] CLN: Remove pickle=True option for object serialization
---
python/pyarrow/serialization.pxi | 14 ++------------
python/pyarrow/serialization.py | 13 +++++++++++--
python/pyarrow/tests/test_serialization.py | 9 ++++++---
3 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/python/pyarrow/serialization.pxi b/python/pyarrow/serialization.pxi
index 6b72277..3ee5c7d 100644
--- a/python/pyarrow/serialization.pxi
+++ b/python/pyarrow/serialization.pxi
@@ -47,7 +47,6 @@ cdef class SerializationContext:
cdef:
object type_to_type_id
object whitelisted_types
- object types_to_pickle
object custom_serializers
object custom_deserializers
@@ -55,11 +54,10 @@ cdef class SerializationContext:
# Types with special serialization handlers
self.type_to_type_id = dict()
self.whitelisted_types = dict()
- self.types_to_pickle = set()
self.custom_serializers = dict()
self.custom_deserializers = dict()
- def register_type(self, type_, type_id, pickle=False,
+ def register_type(self, type_, type_id,
custom_serializer=None, custom_deserializer=None):
"""EXPERIMENTAL: Add type to the list of types we can serialize.
@@ -69,9 +67,6 @@ cdef class SerializationContext:
The type that we can serialize.
type_id : bytes
A string of bytes used to identify the type.
- pickle : bool
- True if the serialization should be done with pickle.
- False if it should be done efficiently with Arrow.
custom_serializer : callable
This argument is optional, but can be provided to
serialize objects of the class in a particular way.
@@ -81,8 +76,6 @@ cdef class SerializationContext:
"""
self.type_to_type_id[type_] = type_id
self.whitelisted_types[type_id] = type_
- if pickle:
- self.types_to_pickle.add(type_id)
if custom_serializer is not None:
self.custom_serializers[type_id] = custom_serializer
self.custom_deserializers[type_id] = custom_deserializer
@@ -102,9 +95,7 @@ cdef class SerializationContext:
# use the closest match to type(obj)
type_id = self.type_to_type_id[type_]
- if type_id in self.types_to_pickle:
- serialized_obj = {"data": pickle.dumps(obj), "pickle": True}
- elif type_id in self.custom_serializers:
+ if type_id in self.custom_serializers:
serialized_obj = {"data": self.custom_serializers[type_id](obj)}
else:
if is_named_tuple(type_):
@@ -125,7 +116,6 @@ cdef class SerializationContext:
# The object was pickled, so unpickle it.
obj = pickle.loads(serialized_obj["data"])
else:
- assert type_id not in self.types_to_pickle
if type_id not in self.whitelisted_types:
msg = "Type ID " + str(type_id) + " not registered in " \
"deserialization callback"
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 2b47513..ab25b63 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -17,12 +17,18 @@
from collections import OrderedDict, defaultdict
import sys
+import pickle
import numpy as np
from pyarrow import serialize_pandas, deserialize_pandas
from pyarrow.lib import _default_serialization_context
+try:
+ import cloudpickle
+except ImportError:
+ cloudpickle = pickle
+
def register_default_serialization_handlers(serialization_context):
@@ -67,9 +73,12 @@ def register_default_serialization_handlers(serialization_context):
serialization_context.register_type(
type(lambda: 0), "function",
- pickle=True)
+ custom_serializer=cloudpickle.dumps,
+ custom_deserializer=cloudpickle.loads)
- serialization_context.register_type(type, "type", pickle=True)
+ serialization_context.register_type(type, "type",
+ custom_serializer=cloudpickle.dumps,
+ custom_deserializer=cloudpickle.loads)
# ----------------------------------------------------------------------
# Set up serialization for numpy with dtype object (primitive types are
diff --git a/python/pyarrow/tests/test_serialization.py b/python/pyarrow/tests/test_serialization.py
index b0c5bc4..ed4fd9a 100644
--- a/python/pyarrow/tests/test_serialization.py
+++ b/python/pyarrow/tests/test_serialization.py
@@ -23,6 +23,7 @@ from collections import namedtuple, OrderedDict, defaultdict
import datetime
import string
import sys
+import pickle
import pyarrow as pa
import numpy as np
@@ -197,7 +198,9 @@ def make_serialization_context():
context.register_type(Baz, "Baz")
context.register_type(Qux, "Quz")
context.register_type(SubQux, "SubQux")
- context.register_type(SubQuxPickle, "SubQuxPickle", pickle=True)
+ context.register_type(SubQuxPickle, "SubQuxPickle",
+ custom_serializer=pickle.dumps,
+ custom_deserializer=pickle.loads)
context.register_type(Exception, "Exception")
context.register_type(CustomError, "CustomError")
context.register_type(Point, "Point")
@@ -338,7 +341,7 @@ def test_serialization_callback_numpy():
return serialized_obj
pa._default_serialization_context.register_type(
- DummyClass, "DummyClass", pickle=False,
+ DummyClass, "DummyClass",
custom_serializer=serialize_dummy_class,
custom_deserializer=deserialize_dummy_class)
@@ -357,7 +360,7 @@ def test_buffer_serialization():
return serialized_obj
pa._default_serialization_context.register_type(
- BufferClass, "BufferClass", pickle=False,
+ BufferClass, "BufferClass",
custom_serializer=serialize_buffer_class,
custom_deserializer=deserialize_buffer_class)
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].