You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/06/30 07:08:24 UTC
[arrow] branch main updated: GH-36096: [Python] Call __from_arrow__ in Array.to_pandas (#36314)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d6a1968cea GH-36096: [Python] Call __from_arrow__ in Array.to_pandas (#36314)
d6a1968cea is described below
commit d6a1968cea344ec6e2f4016d3c9d90ece7c0afad
Author: Dane Pitkin <48...@users.noreply.github.com>
AuthorDate: Fri Jun 30 03:08:17 2023 -0400
GH-36096: [Python] Call __from_arrow__ in Array.to_pandas (#36314)
### Rationale for this change
Array.to_pandas should mimic ChunkedArray.to_pandas implementation. Notably, there is a missing call to `__from_arrow__` if the attribute exists.
### Are these changes tested?
Requires dev pandas. Can manually kick off integration tests to test with pandas nightly.
### Are there any user-facing changes?
No
* Closes: #36096
Authored-by: Dane Pitkin <da...@voltrondata.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
python/pyarrow/array.pxi | 30 +++++++++++++++---------------
python/pyarrow/table.pxi | 11 -----------
python/pyarrow/tests/test_pandas.py | 25 +++++++++++++++++++------
3 files changed, 34 insertions(+), 32 deletions(-)
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 66d473d74e..b704da7360 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1673,6 +1673,21 @@ cdef _array_like_to_pandas(obj, options, types_mapper):
original_type = obj.type
name = obj._name
+ dtype = None
+
+ if types_mapper:
+ dtype = types_mapper(original_type)
+ elif original_type.id == _Type_EXTENSION:
+ try:
+ dtype = original_type.to_pandas_dtype()
+ except NotImplementedError:
+ pass
+
+ # Only call __from_arrow__ for Arrow extension types or when explicitly
+ # overridden via types_mapper
+ if hasattr(dtype, '__from_arrow__'):
+ arr = dtype.__from_arrow__(obj)
+ return pandas_api.series(arr, name=name, copy=False)
# ARROW-3789(wesm): Convert date/timestamp types to datetime64[ns]
c_options.coerce_temporal_nanoseconds = True
@@ -3091,21 +3106,6 @@ cdef class ExtensionArray(Array):
result.validate()
return result
- def _to_pandas(self, options, **kwargs):
- pandas_dtype = None
- try:
- pandas_dtype = self.type.to_pandas_dtype()
- except NotImplementedError:
- pass
-
- # pandas ExtensionDtype that implements conversion from pyarrow
- if hasattr(pandas_dtype, '__from_arrow__'):
- arr = pandas_dtype.__from_arrow__(self)
- return pandas_api.series(arr, copy=False)
-
- # otherwise convert the storage array with the base implementation
- return Array._to_pandas(self.storage, options, **kwargs)
-
class FixedShapeTensorArray(ExtensionArray):
"""
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 9da7d893fe..00067c5796 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -458,17 +458,6 @@ cdef class ChunkedArray(_PandasConvertible):
return result
def _to_pandas(self, options, types_mapper=None, **kwargs):
- pandas_dtype = None
- try:
- pandas_dtype = self.type.to_pandas_dtype()
- except NotImplementedError:
- pass
-
- # pandas ExtensionDtype that implements conversion from pyarrow
- if hasattr(pandas_dtype, '__from_arrow__'):
- arr = pandas_dtype.__from_arrow__(self)
- return pandas_api.series(arr, name=self._name)
-
return _array_like_to_pandas(self, options, types_mapper=types_mapper)
def to_numpy(self):
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index ddd6e9ba36..f1eb824df5 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4818,15 +4818,28 @@ def test_unhashable_map_keys_with_pydicts():
assert tup1[1] == tup2[1]
-def test_column_conversion_for_datetime():
+def test_table_column_conversion_for_datetime():
# GH-35235
- # pandas implemented __from_arrow__ for DatetimeTZDtype
+ # pandas implemented __from_arrow__ for DatetimeTZDtype,
+ # but we choose to do the conversion in Arrow instead.
# https://github.com/pandas-dev/pandas/pull/52201
- arr = pd.Series(pd.date_range("2012", periods=2, tz="Europe/Brussels"),
- name="datetime_column")
- table = pa.table({"datetime_column": pa.array(arr)})
+ series = pd.Series(pd.date_range("2012", periods=2, tz="Europe/Brussels"),
+ name="datetime_column")
+ table = pa.table({"datetime_column": pa.array(series)})
table_col = table.column("datetime_column")
result = table_col.to_pandas()
assert result.name == "datetime_column"
- tm.assert_series_equal(result, arr)
+ tm.assert_series_equal(result, series)
+
+
+def test_array_conversion_for_datetime():
+ # GH-35235
+ # pandas implemented __from_arrow__ for DatetimeTZDtype,
+ # but we choose to do the conversion in Arrow instead.
+ # https://github.com/pandas-dev/pandas/pull/52201
+ series = pd.Series(pd.date_range("2012", periods=2, tz="Europe/Brussels"))
+ arr = pa.array(series)
+
+ result = arr.to_pandas()
+ tm.assert_series_equal(result, series)