You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/01/10 23:47:52 UTC

[arrow] branch master updated: ARROW-1971: [Python] Add pandas serialization to the default

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new b49e8f3  ARROW-1971: [Python] Add pandas serialization to the default
b49e8f3 is described below

commit b49e8f3765350f9b22d631fcd49181c2fbefbc75
Author: devin-petersohn <de...@gmail.com>
AuthorDate: Wed Jan 10 18:47:45 2018 -0500

    ARROW-1971: [Python] Add pandas serialization to the default
    
    Moving pandas register into default register.
    
    Author: devin-petersohn <de...@gmail.com>
    
    Closes #1462 from devin-petersohn/jira/1971_pandas_serialization and squashes the following commits:
    
    b3dfd5b6 [devin-petersohn] Removing slower codepath
    2ed31371 [devin-petersohn] Moving pandas register into default register
---
 python/pyarrow/serialization.py | 124 ++++++++++++++--------------------------
 1 file changed, 44 insertions(+), 80 deletions(-)

diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py
index 689ec15..61f2e83 100644
--- a/python/pyarrow/serialization.py
+++ b/python/pyarrow/serialization.py
@@ -21,7 +21,6 @@ import sys
 
 import numpy as np
 
-from pyarrow import serialize_pandas, deserialize_pandas
 from pyarrow.compat import builtin_pickle
 from pyarrow.lib import _default_serialization_context, frombuffer
 
@@ -61,6 +60,48 @@ _serialize_numpy_array_pickle = _pickle_to_buffer
 _deserialize_numpy_array_pickle = _load_pickle_from_buffer
 
 
+# ----------------------------------------------------------------------
+# pandas-specific serialization matters
+
+def _register_custom_pandas_handlers(context):
+    # ARROW-1784, faster path for pandas-only visibility
+
+    try:
+        import pandas as pd
+    except ImportError:
+        return
+
+    import pyarrow.pandas_compat as pdcompat
+
+    def _serialize_pandas_dataframe(obj):
+        return pdcompat.dataframe_to_serialized_dict(obj)
+
+    def _deserialize_pandas_dataframe(data):
+        return pdcompat.serialized_dict_to_dataframe(data)
+
+    def _serialize_pandas_series(obj):
+        return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
+
+    def _deserialize_pandas_series(data):
+        deserialized = _deserialize_pandas_dataframe(data)
+        return deserialized[deserialized.columns[0]]
+
+    context.register_type(
+        pd.Series, 'pd.Series',
+        custom_serializer=_serialize_pandas_series,
+        custom_deserializer=_deserialize_pandas_series)
+
+    context.register_type(
+        pd.Index, 'pd.Index',
+        custom_serializer=_pickle_to_buffer,
+        custom_deserializer=_load_pickle_from_buffer)
+
+    context.register_type(
+        pd.DataFrame, 'pd.DataFrame',
+        custom_serializer=_serialize_pandas_dataframe,
+        custom_deserializer=_deserialize_pandas_dataframe)
+
+
 def register_default_serialization_handlers(serialization_context):
 
     # ----------------------------------------------------------------------
@@ -136,90 +177,13 @@ def register_default_serialization_handlers(serialization_context):
         # no torch
         pass
 
-
-register_default_serialization_handlers(_default_serialization_context)
+    _register_custom_pandas_handlers(serialization_context)
 
 
-# ----------------------------------------------------------------------
-# pandas-specific serialization matters
-
+register_default_serialization_handlers(_default_serialization_context)
 
 pandas_serialization_context = _default_serialization_context.clone()
 
-
-def _register_pandas_arrow_handlers(context):
-    try:
-        import pandas as pd
-    except ImportError:
-        return
-
-    def _serialize_pandas_series(obj):
-        return serialize_pandas(pd.DataFrame({obj.name: obj}))
-
-    def _deserialize_pandas_series(data):
-        deserialized = deserialize_pandas(data)
-        return deserialized[deserialized.columns[0]]
-
-    def _serialize_pandas_dataframe(obj):
-        return serialize_pandas(obj)
-
-    def _deserialize_pandas_dataframe(data):
-        return deserialize_pandas(data)
-
-    context.register_type(
-        pd.Series, 'pd.Series',
-        custom_serializer=_serialize_pandas_series,
-        custom_deserializer=_deserialize_pandas_series)
-
-    context.register_type(
-        pd.DataFrame, 'pd.DataFrame',
-        custom_serializer=_serialize_pandas_dataframe,
-        custom_deserializer=_deserialize_pandas_dataframe)
-
-
-def _register_custom_pandas_handlers(context):
-    # ARROW-1784, faster path for pandas-only visibility
-
-    try:
-        import pandas as pd
-    except ImportError:
-        return
-
-    import pyarrow.pandas_compat as pdcompat
-
-    def _serialize_pandas_dataframe(obj):
-        return pdcompat.dataframe_to_serialized_dict(obj)
-
-    def _deserialize_pandas_dataframe(data):
-        return pdcompat.serialized_dict_to_dataframe(data)
-
-    def _serialize_pandas_series(obj):
-        return _serialize_pandas_dataframe(pd.DataFrame({obj.name: obj}))
-
-    def _deserialize_pandas_series(data):
-        deserialized = _deserialize_pandas_dataframe(data)
-        return deserialized[deserialized.columns[0]]
-
-    context.register_type(
-        pd.Series, 'pd.Series',
-        custom_serializer=_serialize_pandas_series,
-        custom_deserializer=_deserialize_pandas_series)
-
-    context.register_type(
-        pd.Index, 'pd.Index',
-        custom_serializer=_pickle_to_buffer,
-        custom_deserializer=_load_pickle_from_buffer)
-
-    context.register_type(
-        pd.DataFrame, 'pd.DataFrame',
-        custom_serializer=_serialize_pandas_dataframe,
-        custom_deserializer=_deserialize_pandas_dataframe)
-
-
-_register_pandas_arrow_handlers(_default_serialization_context)
-_register_custom_pandas_handlers(pandas_serialization_context)
-
-
 pandas_serialization_context.register_type(
     np.ndarray, 'np.array',
     custom_serializer=_serialize_numpy_array_pickle,

-- 
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].