You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/10 23:47:52 UTC
[arrow] branch master updated: ARROW-1971: [Python] Add pandas
serialization to the default
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b49e8f3 ARROW-1971: [Python] Add pandas serialization to the default
b49e8f3 is described below
commit b49e8f3765350f9b22d631fcd49181c2fbefbc75
Author: devin-petersohn <de...@gmail.com>
AuthorDate: Wed Jan 10 18:47:45 2018 -0500
ARROW-1971: [Python] Add pandas serialization to the default
Moving pandas register into default register.
Author: devin-petersohn <de...@gmail.com>
Closes #1462 from devin-petersohn/jira/1971_pandas_serialization and squashes the following commits:
b3dfd5b6 [devin-petersohn] Removing slower codepath
2ed31371 [devin-petersohn] Moving pandas register into default register
---
python/pyarrow/serialization.py | 124 ++++++++++++++--------------------------
1 file changed, 44 insertions(+), 80 deletions(-)
diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 689ec15..61f2e83 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -21,7 +21,6 @@ import sys
import numpy as np
-from pyarrow import serialize_pandas, deserialize_pandas
from pyarrow.compat import builtin_pickle
from pyarrow.lib import _default_serialization_context, frombuffer
@@ -61,6 +60,48 @@ _serialize_numpy_array_pickle = _pickle_to_buffer
_deserialize_numpy_array_pickle = _load_pickle_from_buffer
+# ----------------------------------------------------------------------
+# pandas-specific serialization matters
+
+def _register_custom_pandas_handlers(context):
+ # ARROW-1784, faster path for pandas-only visibility
+
+ try:
+ import pandas as pd
+ except ImportError:
+ return
+
+ import pyarrow.pandas_compat as pdcompat
+
+ def _serialize_pandas_dataframe(obj):
+ return pdcompat.dataframe_to_serialized_dict(obj)
+
+ def _deserialize_pandas_dataframe(data):
+ return pdcompat.serialized_dict_to_dataframe(data)
+
+ def _serialize_pandas_series(obj):
+ return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
+
+ def _deserialize_pandas_series(data):
+ deserialized = _deserialize_pandas_dataframe(data)
+ return deserialized[deserialized.columns[0]]
+
+ context.register_type(
+ pd.Series, 'pd.Series',
+ custom_serializer=_serialize_pandas_series,
+ custom_deserializer=_deserialize_pandas_series)
+
+ context.register_type(
+ pd.Index, 'pd.Index',
+ custom_serializer=_pickle_to_buffer,
+ custom_deserializer=_load_pickle_from_buffer)
+
+ context.register_type(
+ pd.DataFrame, 'pd.DataFrame',
+ custom_serializer=_serialize_pandas_dataframe,
+ custom_deserializer=_deserialize_pandas_dataframe)
+
+
def register_default_serialization_handlers(serialization_context):
# ----------------------------------------------------------------------
@@ -136,90 +177,13 @@ def register_default_serialization_handlers(serialization_context):
# no torch
pass
-
-register_default_serialization_handlers(_default_serialization_context)
+ _register_custom_pandas_handlers(serialization_context)
-# ----------------------------------------------------------------------
-# pandas-specific serialization matters
-
+register_default_serialization_handlers(_default_serialization_context)
pandas_serialization_context = _default_serialization_context.clone()
-
-def _register_pandas_arrow_handlers(context):
- try:
- import pandas as pd
- except ImportError:
- return
-
- def _serialize_pandas_series(obj):
- return serialize_pandas(pd.DataFrame({obj.name: obj}))
-
- def _deserialize_pandas_series(data):
- deserialized = deserialize_pandas(data)
- return deserialized[deserialized.columns[0]]
-
- def _serialize_pandas_dataframe(obj):
- return serialize_pandas(obj)
-
- def _deserialize_pandas_dataframe(data):
- return deserialize_pandas(data)
-
- context.register_type(
- pd.Series, 'pd.Series',
- custom_serializer=_serialize_pandas_series,
- custom_deserializer=_deserialize_pandas_series)
-
- context.register_type(
- pd.DataFrame, 'pd.DataFrame',
- custom_serializer=_serialize_pandas_dataframe,
- custom_deserializer=_deserialize_pandas_dataframe)
-
-
-def _register_custom_pandas_handlers(context):
- # ARROW-1784, faster path for pandas-only visibility
-
- try:
- import pandas as pd
- except ImportError:
- return
-
- import pyarrow.pandas_compat as pdcompat
-
- def _serialize_pandas_dataframe(obj):
- return pdcompat.dataframe_to_serialized_dict(obj)
-
- def _deserialize_pandas_dataframe(data):
- return pdcompat.serialized_dict_to_dataframe(data)
-
- def _serialize_pandas_series(obj):
- return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
-
- def _deserialize_pandas_series(data):
- deserialized = _deserialize_pandas_dataframe(data)
- return deserialized[deserialized.columns[0]]
-
- context.register_type(
- pd.Series, 'pd.Series',
- custom_serializer=_serialize_pandas_series,
- custom_deserializer=_deserialize_pandas_series)
-
- context.register_type(
- pd.Index, 'pd.Index',
- custom_serializer=_pickle_to_buffer,
- custom_deserializer=_load_pickle_from_buffer)
-
- context.register_type(
- pd.DataFrame, 'pd.DataFrame',
- custom_serializer=_serialize_pandas_dataframe,
- custom_deserializer=_deserialize_pandas_dataframe)
-
-
-_register_pandas_arrow_handlers(_default_serialization_context)
-_register_custom_pandas_handlers(pandas_serialization_context)
-
-
pandas_serialization_context.register_type(
np.ndarray, 'np.array',
custom_serializer=_serialize_numpy_array_pickle,
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].