You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2021/04/15 09:23:03 UTC

[arrow] branch master updated: ARROW-12057: [Python] Remove direct usage of pandas' Block subclasses (partly)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1e6819c  ARROW-12057: [Python] Remove direct usage of pandas' Block subclasses (partly)
1e6819c is described below

commit 1e6819c04eadfdf334cf62fbf4618df98ae3d97f
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Thu Apr 15 11:21:36 2021 +0200

    ARROW-12057: [Python] Remove direct usage of pandas' Block subclasses (partly)
    
    Closes #10017 from jorisvandenbossche/ARROW-12057-pandas-block-classes
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/pandas_compat.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 91b38b8..e4b1317 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -647,7 +647,6 @@ def get_datetimetz_type(values, dtype, type_):
 
 
 def dataframe_to_serialized_dict(frame):
-    import pandas.core.internals as _int
     block_manager = frame._data
 
     blocks = []
@@ -657,11 +656,11 @@ def dataframe_to_serialized_dict(frame):
         values = block.values
         block_data = {}
 
-        if isinstance(block, _int.DatetimeTZBlock):
+        if _pandas_api.is_datetimetz(values.dtype):
             block_data['timezone'] = pa.lib.tzinfo_to_string(values.tz)
             if hasattr(values, 'values'):
                 values = values.values
-        elif isinstance(block, _int.CategoricalBlock):
+        elif _pandas_api.is_categorical(values):
             block_data.update(dictionary=values.categories,
                               ordered=values.ordered)
             values = values.codes
@@ -670,10 +669,8 @@ def dataframe_to_serialized_dict(frame):
             block=values
         )
 
-        # If we are dealing with an object array, pickle it instead. Note that
-        # we do not use isinstance here because _int.CategoricalBlock is a
-        # subclass of _int.ObjectBlock.
-        if type(block) == _int.ObjectBlock:
+        # If we are dealing with an object array, pickle it instead.
+        if values.dtype == np.dtype(object):
             block_data['object'] = None
             block_data['block'] = builtin_pickle.dumps(
                 values, protocol=builtin_pickle.HIGHEST_PROTOCOL)
@@ -731,8 +728,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
         cat = _pandas_api.categorical_type.from_codes(
             block_arr, categories=item['dictionary'],
             ordered=item['ordered'])
-        block = _int.make_block(cat, placement=placement,
-                                klass=_int.CategoricalBlock)
+        block = _int.make_block(cat, placement=placement)
     elif 'timezone' in item:
         dtype = make_datetimetz(item['timezone'])
         block = _int.make_block(block_arr, placement=placement,
@@ -740,7 +736,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
                                 dtype=dtype)
     elif 'object' in item:
         block = _int.make_block(builtin_pickle.loads(block_arr),
-                                placement=placement, klass=_int.ObjectBlock)
+                                placement=placement)
     elif 'py_array' in item:
         # create ExtensionBlock
         arr = item['py_array']
@@ -751,8 +747,7 @@ def _reconstruct_block(item, columns=None, extension_columns=None):
             raise ValueError("This column does not support to be converted "
                              "to a pandas ExtensionArray")
         pd_ext_arr = pandas_dtype.__from_arrow__(arr)
-        block = _int.make_block(pd_ext_arr, placement=placement,
-                                klass=_int.ExtensionBlock)
+        block = _int.make_block(pd_ext_arr, placement=placement)
     else:
         block = _int.make_block(block_arr, placement=placement)