You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/07/04 18:13:04 UTC
[arrow] branch main updated: GH-29705: [Python] Clean-up no-longer-used pandas dataframe serialization helpers (#36413)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 87ea379148 GH-29705: [Python] Clean-up no-longer-used pandas dataframe serialization helpers (#36413)
87ea379148 is described below

commit 87ea379148ea6321ebb1b565f00b27c597167082
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Jul 4 20:12:56 2023 +0200

    GH-29705: [Python] Clean-up no-longer-used pandas dataframe serialization helpers (#36413)
    
    Small follow-up on https://github.com/apache/arrow/pull/34926, which removed the `pyarrow.serialization` functionality, making those functions obsolete.
    
    * Closes: #29705
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/pandas_compat.py | 53 +----------------------------------------
 1 file changed, 1 insertion(+), 52 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 6b9514ea6b..5369677e87 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -673,54 +673,7 @@ def get_datetimetz_type(values, dtype, type_):
     return values, type_
 
 # ----------------------------------------------------------------------
-# Converting pandas.DataFrame to a dict containing only NumPy arrays or other
-# objects friendly to pyarrow.serialize
-
-
-def dataframe_to_serialized_dict(frame):
-    block_manager = frame._data
-
-    blocks = []
-    axes = [ax for ax in block_manager.axes]
-
-    for block in block_manager.blocks:
-        values = block.values
-        block_data = {}
-
-        if _pandas_api.is_datetimetz(values.dtype):
-            block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
-            if hasattr(values, 'values'):
-                values = values.values
-        elif _pandas_api.is_categorical(values):
-            block_data.update(dictionary=values.categories,
-                              ordered=values.ordered)
-            values = values.codes
-        block_data.update(
-            placement=block.mgr_locs.as_array,
-            block=values
-        )
-
-        # If we are dealing with an object array, pickle it instead.
-        if values.dtype == np.dtype(object):
-            block_data['object'] = None
-            block_data['block'] = builtin_pickle.dumps(
-                values, protocol=builtin_pickle.HIGHEST_PROTOCOL)
-
-        blocks.append(block_data)
-
-    return {
-        'blocks': blocks,
-        'axes': axes
-    }
-
-
-def serialized_dict_to_dataframe(data):
-    import pandas.core.internals as _int
-    reconstructed_blocks = [_reconstruct_block(block)
-                            for block in data['blocks']]
-
-    block_mgr = _int.BlockManager(reconstructed_blocks, data['axes'])
-    return _pandas_api.data_frame(block_mgr)
+# Converting pyarrow.Table efficiently to pandas.DataFrame
 
 
 def _reconstruct_block(item, columns=None, extension_columns=None):
@@ -790,10 +743,6 @@ def make_datetimetz(tz):
     return _pandas_api.datetimetz_type('ns', tz=tz)
 
 
-# ----------------------------------------------------------------------
-# Converting pyarrow.Table efficiently to pandas.DataFrame
-
-
 def table_to_blockmanager(options, table, categories=None,
                           ignore_metadata=False, types_mapper=None):
     from pandas.core.internals import BlockManager