You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/11/22 10:54:49 UTC
[arrow] branch master updated: ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)
This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new f769f6b323 ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)
f769f6b323 is described below
commit f769f6b32373fcf5fc2a7a51152b375127ca4af7
Author: Alenka Frim <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 22 11:54:38 2022 +0100
ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)
This PR tries to make changes to drop older versions of pandas and support versions >= 1.0.0.
The changes will have to be done in:
- [x] the official documentation (pandas version support)
- [x] the CI jobs supporting older pandas versions
- [x] https://github.com/apache/arrow/blob/master/python/pyarrow/pandas-shim.pxi
- [x] tests that are specifically testing features on older versions of pandas
Lead-authored-by: Alenka Frim <fr...@gmail.com>
Co-authored-by: Alenka Frim <Al...@users.noreply.github.com>
Co-authored-by: Joris Van den Bossche <jo...@gmail.com>
Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
.github/workflows/python.yml | 8 +-
docs/source/python/install.rst | 15 ++++
python/pyarrow/feather.py | 9 ---
python/pyarrow/pandas-shim.pxi | 27 ++-----
python/pyarrow/pandas_compat.py | 3 +-
python/pyarrow/tests/parquet/test_dataset.py | 10 +--
python/pyarrow/tests/parquet/test_pandas.py | 5 --
python/pyarrow/tests/test_compute.py | 82 ++++++++-------------
python/pyarrow/tests/test_pandas.py | 106 ++++++++-------------------
python/pyarrow/tests/test_schema.py | 4 +-
10 files changed, 92 insertions(+), 177 deletions(-)
diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 3bc4a75b24..1fcf662ba7 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -54,7 +54,7 @@ jobs:
name:
- conda-python-docs
- conda-python-3.8-nopandas
- - conda-python-3.7-pandas-0.23
+ - conda-python-3.7-pandas-1.0
- conda-python-3.9-pandas-latest
include:
- name: conda-python-docs
@@ -67,12 +67,12 @@ jobs:
image: conda-python
title: AMD64 Conda Python 3.8 Without Pandas
python: 3.8
- - name: conda-python-3.7-pandas-0.23
+ - name: conda-python-3.7-pandas-1.0
cache: conda-python-3.7
image: conda-python-pandas
- title: AMD64 Conda Python 3.7 Pandas 0.23
+ title: AMD64 Conda Python 3.7 Pandas 1.0
python: 3.7
- pandas: 0.23
+ pandas: 1.0
numpy: 1.16
- name: conda-python-3.9-pandas-latest
cache: conda-python-3.9
diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst
index ec71388152..f85b7522eb 100644
--- a/docs/source/python/install.rst
+++ b/docs/source/python/install.rst
@@ -61,3 +61,18 @@ Installing from source
----------------------
See :ref:`python-development`.
+
+Dependencies
+------------
+
+Required dependency
+
+* **NumPy 1.16.6** or higher.
+
+Optional dependencies
+
+* **pandas 1.0** or higher,
+* **cffi**.
+
+Additional packages PyArrow is compatible with are :ref:`fsspec <filesystem-fsspec>`
+and **pytz**, **dateutil** or **tzdata** package for timezones.
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index a0547949c9..54a16a2f89 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -24,12 +24,6 @@ from pyarrow.lib import (Codec, Table, # noqa
import pyarrow.lib as ext
from pyarrow import _feather
from pyarrow._feather import FeatherError # noqa: F401
-from pyarrow.vendored.version import Version
-
-
-def _check_pandas_version():
- if _pandas_api.loose_version < Version('0.17.0'):
- raise ImportError("feather requires pandas >= 0.17.0")
class FeatherDataset:
@@ -96,7 +90,6 @@ class FeatherDataset:
pandas.DataFrame
Content of the file as a pandas DataFrame (of columns)
"""
- _check_pandas_version()
return self.read_table(columns=columns).to_pandas(
use_threads=use_threads)
@@ -145,7 +138,6 @@ def write_feather(df, dest, compression=None, compression_level=None,
limited legacy format
"""
if _pandas_api.have_pandas:
- _check_pandas_version()
if (_pandas_api.has_sparse and
isinstance(df, _pandas_api.pd.SparseDataFrame)):
df = df.to_dense()
@@ -230,7 +222,6 @@ def read_feather(source, columns=None, use_threads=True,
-------
df : pandas.DataFrame
"""
- _check_pandas_version()
return (read_table(
source, columns=columns, memory_map=memory_map,
use_threads=use_threads).to_pandas(use_threads=use_threads, **kwargs))
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 0e7cfe9374..4f96943e1b 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -59,16 +59,16 @@ cdef class _PandasAPIShim(object):
self._version = pd.__version__
self._loose_version = Version(pd.__version__)
- if self._loose_version < Version('0.23.0'):
+ if self._loose_version < Version('1.0.0'):
self._have_pandas = False
if raise_:
raise ImportError(
- "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+ "pyarrow requires pandas 1.0.0 or above, pandas {} is "
"installed".format(self._version)
)
else:
warnings.warn(
- "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+ "pyarrow requires pandas 1.0.0 or above, pandas {} is "
"installed. Therefore, pandas-specific integration is not "
"used.".format(self._version), stacklevel=2)
return
@@ -83,22 +83,12 @@ cdef class _PandasAPIShim(object):
self._series, self._index, self._categorical_type,
self._extension_array)
self._extension_dtype = pd.api.extensions.ExtensionDtype
- if self._loose_version >= Version('0.24.0'):
- self._is_extension_array_dtype = \
- pd.api.types.is_extension_array_dtype
- else:
- self._is_extension_array_dtype = None
-
+ self._is_extension_array_dtype = (
+ pd.api.types.is_extension_array_dtype)
self._types_api = pd.api.types
self._datetimetz_type = pd.api.types.DatetimeTZDtype
self._have_pandas = True
-
- if self._loose_version > Version('0.25'):
- self.has_sparse = False
- else:
- self.has_sparse = True
-
- self._pd024 = self._loose_version >= Version('0.24')
+ self.has_sparse = False
cdef inline _check_import(self, bint raise_=True):
if self._tried_importing_pandas:
@@ -232,10 +222,7 @@ cdef class _PandasAPIShim(object):
self._check_import()
if isinstance(obj.dtype, (self.pd.api.types.IntervalDtype,
self.pd.api.types.PeriodDtype)):
- if self._pd024:
- # only since pandas 0.24, interval and period are stored as
- # such in Series
- return obj.array
+ return obj.array
return obj.values
def assert_frame_equal(self, *args, **kwargs):
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 9fa7a699ef..d624459ca4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -1089,9 +1089,8 @@ def _pandas_type_to_numpy_type(pandas_type):
def _get_multiindex_codes(mi):
- # compat for pandas < 0.24 (MI labels renamed to codes).
if isinstance(mi, _pandas_api.pd.MultiIndex):
- return mi.codes if hasattr(mi, 'codes') else mi.labels
+ return mi.codes
else:
return None
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 099a1eaf48..654fd4ddc1 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -250,13 +250,11 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
result_df = table.to_pandas().reset_index(drop=True)
# Check that all rows in the DF fulfill the filter
- # Pandas 0.23.x has problems with indexing constant memoryviews in
- # categoricals. Thus we need to make an explicit copy here with np.array.
- df_filter_1 = (np.array(result_df['integer']) == 1) \
- & (np.array(result_df['string']) != 'b') \
- & (np.array(result_df['boolean']) == 'True')
+ df_filter_1 = (result_df['integer'] == 1) \
+ & (result_df['string'] != 'b') \
+ & (result_df['boolean'] == 'True')
df_filter_2 = (np.array(result_df['integer']) == 0) \
- & (np.array(result_df['boolean']) == 'False')
+ & (result_df['boolean'] == 'False')
assert df_filter_1.sum() > 0
assert df_filter_2.sum() > 0
assert result_df.shape[0] == (df_filter_1.sum() + df_filter_2.sum())
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 0d0658cc38..3bc204c978 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -26,7 +26,6 @@ from pyarrow.fs import LocalFileSystem, SubTreeFileSystem
from pyarrow.tests.parquet.common import (
parametrize_legacy_dataset, parametrize_legacy_dataset_not_supported)
from pyarrow.util import guid
-from pyarrow.vendored.version import Version
try:
import pyarrow.parquet as pq
@@ -561,10 +560,6 @@ def test_pandas_categorical_roundtrip(use_legacy_dataset):
def test_write_to_dataset_pandas_preserve_extensiondtypes(
tempdir, use_legacy_dataset
):
- # ARROW-8251 - preserve pandas extension dtypes in roundtrip
- if Version(pd.__version__) < Version("1.0.0"):
- pytest.skip("__arrow_array__ added to pandas in 1.0.0")
-
df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]})
df['col'] = df['col'].astype("Int64")
table = pa.table(df)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 34dc3bf452..3d03c7d86a 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1812,14 +1812,6 @@ def test_strptime():
@pytest.mark.skipif(sys.platform == 'win32',
reason="Timezone database is not available on Windows yet")
def test_strftime():
- from pyarrow.vendored.version import Version
-
- def _fix_timestamp(s):
- if Version(pd.__version__) < Version("1.0.0"):
- return s.to_series().replace("NaT", pd.NaT)
- else:
- return s
-
times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
timezones = ["CET", "UTC", "Europe/Ljubljana"]
@@ -1834,7 +1826,7 @@ def test_strftime():
for fmt in formats:
options = pc.StrftimeOptions(fmt)
result = pc.strftime(tsa, options=options)
- expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+ expected = pa.array(ts.strftime(fmt))
assert result.equals(expected)
fmt = "%Y-%m-%dT%H:%M:%S"
@@ -1842,34 +1834,34 @@ def test_strftime():
# Default format
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
result = pc.strftime(tsa, options=pc.StrftimeOptions())
- expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+ expected = pa.array(ts.strftime(fmt))
assert result.equals(expected)
# Default format plus timezone
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
- expected = pa.array(_fix_timestamp(ts.strftime(fmt + "%Z")))
+ expected = pa.array(ts.strftime(fmt + "%Z"))
assert result.equals(expected)
# Pandas %S is equivalent to %S in arrow for unit="s"
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
options = pc.StrftimeOptions("%S")
result = pc.strftime(tsa, options=options)
- expected = pa.array(_fix_timestamp(ts.strftime("%S")))
+ expected = pa.array(ts.strftime("%S"))
assert result.equals(expected)
# Pandas %S.%f is equivalent to %S in arrow for unit="us"
tsa = pa.array(ts, type=pa.timestamp("us", timezone))
options = pc.StrftimeOptions("%S")
result = pc.strftime(tsa, options=options)
- expected = pa.array(_fix_timestamp(ts.strftime("%S.%f")))
+ expected = pa.array(ts.strftime("%S.%f"))
assert result.equals(expected)
# Test setting locale
tsa = pa.array(ts, type=pa.timestamp("s", timezone))
options = pc.StrftimeOptions(fmt, locale="C")
result = pc.strftime(tsa, options=options)
- expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+ expected = pa.array(ts.strftime(fmt))
assert result.equals(expected)
# Test timestamps without timezone
@@ -1877,7 +1869,8 @@ def test_strftime():
ts = pd.to_datetime(times)
tsa = pa.array(ts, type=pa.timestamp("s"))
result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
- expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+ expected = pa.array(ts.strftime(fmt))
+
# Positional format
assert pc.strftime(tsa, fmt) == result
@@ -1956,8 +1949,6 @@ def _check_datetime_components(timestamps, timezone=None):
@pytest.mark.pandas
def test_extract_datetime_components():
- from pyarrow.vendored.version import Version
-
timestamps = ["1970-01-01T00:00:59.123456789",
"2000-02-29T23:23:23.999999999",
"2033-05-18T03:33:20.000000000",
@@ -1983,8 +1974,6 @@ def test_extract_datetime_components():
if sys.platform == 'win32':
# TODO: We should test on windows once ARROW-13168 is resolved.
pytest.skip('Timezone database is not available on Windows yet')
- elif Version(pd.__version__) < Version('1.0.0'):
- pytest.skip('Pandas < 1.0 extracts time components incorrectly.')
else:
for timezone in timezones:
_check_datetime_components(timestamps, timezone)
@@ -1995,8 +1984,6 @@ def test_extract_datetime_components():
@pytest.mark.skipif(sys.platform == 'win32',
reason="Timezone database is not available on Windows yet")
def test_assume_timezone():
- from pyarrow.vendored.version import Version
-
ts_type = pa.timestamp("ns")
timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
"2000-02-29T23:23:23.999999999",
@@ -2040,31 +2027,29 @@ def test_assume_timezone():
timezone = "Europe/Brussels"
- # nonexistent parameter was introduced in Pandas 0.24.0
- if Version(pd.__version__) >= Version("0.24.0"):
- options_nonexistent_raise = pc.AssumeTimezoneOptions(timezone)
- options_nonexistent_earliest = pc.AssumeTimezoneOptions(
- timezone, ambiguous="raise", nonexistent="earliest")
- options_nonexistent_latest = pc.AssumeTimezoneOptions(
- timezone, ambiguous="raise", nonexistent="latest")
-
- with pytest.raises(ValueError,
- match="Timestamp doesn't exist in "
- f"timezone '{timezone}'"):
- pc.assume_timezone(nonexistent_array,
- options=options_nonexistent_raise)
-
- expected = pa.array(nonexistent.tz_localize(
- timezone, nonexistent="shift_forward"))
- result = pc.assume_timezone(
- nonexistent_array, options=options_nonexistent_latest)
- expected.equals(result)
-
- expected = pa.array(nonexistent.tz_localize(
- timezone, nonexistent="shift_backward"))
- result = pc.assume_timezone(
- nonexistent_array, options=options_nonexistent_earliest)
- expected.equals(result)
+ options_nonexistent_raise = pc.AssumeTimezoneOptions(timezone)
+ options_nonexistent_earliest = pc.AssumeTimezoneOptions(
+ timezone, ambiguous="raise", nonexistent="earliest")
+ options_nonexistent_latest = pc.AssumeTimezoneOptions(
+ timezone, ambiguous="raise", nonexistent="latest")
+
+ with pytest.raises(ValueError,
+ match="Timestamp doesn't exist in "
+ f"timezone '{timezone}'"):
+ pc.assume_timezone(nonexistent_array,
+ options=options_nonexistent_raise)
+
+ expected = pa.array(nonexistent.tz_localize(
+ timezone, nonexistent="shift_forward"))
+ result = pc.assume_timezone(
+ nonexistent_array, options=options_nonexistent_latest)
+ expected.equals(result)
+
+ expected = pa.array(nonexistent.tz_localize(
+ timezone, nonexistent="shift_backward"))
+ result = pc.assume_timezone(
+ nonexistent_array, options=options_nonexistent_earliest)
+ expected.equals(result)
options_ambiguous_raise = pc.AssumeTimezoneOptions(timezone)
options_ambiguous_latest = pc.AssumeTimezoneOptions(
@@ -2199,11 +2184,6 @@ def _check_temporal_rounding(ts, values, unit):
"second", "minute", "hour", "day"))
@pytest.mark.pandas
def test_round_temporal(unit):
- from pyarrow.vendored.version import Version
-
- if Version(pd.__version__) < Version('1.0.0'):
- pytest.skip('Pandas < 1.0 rounds differently.')
-
values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
timestamps = [
"1923-07-07 08:52:35.203790336",
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 6ec0532a02..a1ab4d4388 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1071,13 +1071,10 @@ class TestConvertDateTimeLikeTypes:
pytz = pytest.importorskip("pytz")
from datetime import timezone
- if Version(pd.__version__) > Version("0.25.0"):
- # older pandas versions fail on datetime.timezone.utc (as in input)
- # vs pytz.UTC (as in result)
- values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)]
- # also test with index to ensure both paths roundtrip (ARROW-9962)
- df = pd.DataFrame({'datetime': values}, index=values)
- _check_pandas_roundtrip(df, preserve_index=True)
+ values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)]
+ # also test with index to ensure both paths roundtrip (ARROW-9962)
+ df = pd.DataFrame({'datetime': values}, index=values)
+ _check_pandas_roundtrip(df, preserve_index=True)
# datetime.timezone is going to be pytz.FixedOffset
hours = 1
@@ -2857,13 +2854,9 @@ def _fully_loaded_dataframe_example():
6: [True, False] * 5,
7: np.random.randn(10),
8: np.random.randint(0, 100, size=10),
- 9: pd.period_range('2013', periods=10, freq='M')
+ 9: pd.period_range('2013', periods=10, freq='M'),
+ 10: pd.interval_range(start=1, freq=1, periods=10),
}
-
- if Version(pd.__version__) >= Version('0.21'):
- # There is an issue with pickling IntervalIndex in pandas 0.20.x
- data[10] = pd.interval_range(start=1, freq=1, periods=10)
-
return pd.DataFrame(data, index=index)
@@ -2941,16 +2934,6 @@ def test_convert_unsupported_type_error_message():
with pytest.raises(ValueError, match=msg):
pa.Table.from_pandas(df)
- # period unsupported for pandas <= 0.25
- if Version(pd.__version__) <= Version('0.25'):
- df = pd.DataFrame({
- 'a': pd.period_range('2000-01-01', periods=20),
- })
-
- msg = 'Conversion failed for column a with type (period|object)'
- with pytest.raises((TypeError, ValueError), match=msg):
- pa.Table.from_pandas(df)
-
# ----------------------------------------------------------------------
# Hypothesis tests
@@ -3852,40 +3835,32 @@ def test_dictionary_from_pandas_specified_type():
def test_array_protocol():
- if Version(pd.__version__) < Version('0.24.0'):
- pytest.skip('IntegerArray only introduced in 0.24')
-
df = pd.DataFrame({'a': pd.Series([1, 2, None], dtype='Int64')})
- if Version(pd.__version__) < Version('0.26.0.dev'):
- # with pandas<=0.25, trying to convert nullable integer errors
- with pytest.raises(TypeError):
- pa.table(df)
- else:
- # __arrow_array__ added to pandas IntegerArray in 0.26.0.dev
+ # __arrow_array__ added to pandas IntegerArray in 0.26.0.dev
- # default conversion
- result = pa.table(df)
- expected = pa.array([1, 2, None], pa.int64())
- assert result[0].chunk(0).equals(expected)
+ # default conversion
+ result = pa.table(df)
+ expected = pa.array([1, 2, None], pa.int64())
+ assert result[0].chunk(0).equals(expected)
- # with specifying schema
- schema = pa.schema([('a', pa.float64())])
- result = pa.table(df, schema=schema)
- expected2 = pa.array([1, 2, None], pa.float64())
- assert result[0].chunk(0).equals(expected2)
+ # with specifying schema
+ schema = pa.schema([('a', pa.float64())])
+ result = pa.table(df, schema=schema)
+ expected2 = pa.array([1, 2, None], pa.float64())
+ assert result[0].chunk(0).equals(expected2)
- # pass Series to pa.array
- result = pa.array(df['a'])
- assert result.equals(expected)
- result = pa.array(df['a'], type=pa.float64())
- assert result.equals(expected2)
+ # pass Series to pa.array
+ result = pa.array(df['a'])
+ assert result.equals(expected)
+ result = pa.array(df['a'], type=pa.float64())
+ assert result.equals(expected2)
- # pass actual ExtensionArray to pa.array
- result = pa.array(df['a'].values)
- assert result.equals(expected)
- result = pa.array(df['a'].values, type=pa.float64())
- assert result.equals(expected2)
+ # pass actual ExtensionArray to pa.array
+ result = pa.array(df['a'].values)
+ assert result.equals(expected)
+ result = pa.array(df['a'].values, type=pa.float64())
+ assert result.equals(expected2)
class DummyExtensionType(pa.PyExtensionType):
@@ -3907,9 +3882,6 @@ def PandasArray__arrow_array__(self, type=None):
def test_array_protocol_pandas_extension_types(monkeypatch):
# ARROW-7022 - ensure protocol works for Period / Interval extension dtypes
- if Version(pd.__version__) < Version('0.24.0'):
- pytest.skip('Period/IntervalArray only introduced in 0.24')
-
storage = pa.array([1, 2, 3], type=pa.int64())
expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
@@ -3956,9 +3928,6 @@ def _Int64Dtype__from_arrow__(self, array):
def test_convert_to_extension_array(monkeypatch):
- if Version(pd.__version__) < Version("0.26.0.dev"):
- pytest.skip("Conversion from IntegerArray to arrow not yet supported")
-
import pandas.core.internals as _int
# table converted from dataframe with extension types (so pandas_metadata
@@ -4012,19 +3981,10 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
# converting extension type to linked pandas ExtensionDtype/Array
import pandas.core.internals as _int
- if Version(pd.__version__) < Version("0.24.0"):
- pytest.skip("ExtensionDtype introduced in pandas 0.24")
-
storage = pa.array([1, 2, 3, 4], pa.int64())
arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
table = pa.table({'a': arr})
- if Version(pd.__version__) < Version("0.26.0.dev"):
- # ensure pandas Int64Dtype has the protocol method (for older pandas)
- monkeypatch.setattr(
- pd.Int64Dtype, '__from_arrow__', _Int64Dtype__from_arrow__,
- raising=False)
-
# extension type points to Int64Dtype, which knows how to create a
# pandas ExtensionArray
result = arr.to_pandas()
@@ -4039,9 +3999,7 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
# monkeypatch pandas Int64Dtype to *not* have the protocol method
# (remove the version added above and the actual version for recent pandas)
- if Version(pd.__version__) < Version("0.26.0.dev"):
- monkeypatch.delattr(pd.Int64Dtype, "__from_arrow__")
- elif Version(pd.__version__) < Version("1.3.0.dev"):
+ if Version(pd.__version__) < Version("1.3.0.dev"):
monkeypatch.delattr(
pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
else:
@@ -4058,9 +4016,6 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
def test_to_pandas_extension_dtypes_mapping():
- if Version(pd.__version__) < Version("0.26.0.dev"):
- pytest.skip("Conversion to pandas IntegerArray not yet supported")
-
table = pa.table({'a': pa.array([1, 2, 3], pa.int64())})
# default use numpy dtype
@@ -4102,9 +4057,6 @@ def test_array_to_pandas():
def test_roundtrip_empty_table_with_extension_dtype_index():
- if Version(pd.__version__) < Version("1.0.0"):
- pytest.skip("ExtensionDtype to_pandas method missing")
-
df = pd.DataFrame(index=pd.interval_range(start=0, end=3))
table = pa.table(df)
table.to_pandas().index == pd.Index([{'left': 0, 'right': 1},
@@ -4116,7 +4068,7 @@ def test_roundtrip_empty_table_with_extension_dtype_index():
def test_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):
- pytest.skip("ExtensionDtype to_pandas method missing")
+ pytest.skip("Float64Dtype extension dtype missing")
data = pa.array([1, 2, 3], pa.int64())
@@ -4140,7 +4092,7 @@ def test_array_to_pandas_types_mapper():
def test_chunked_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):
- pytest.skip("ExtensionDtype to_pandas method missing")
+ pytest.skip("Float64Dtype extension dtype missing")
data = pa.chunked_array([pa.array([1, 2, 3], pa.int64())])
assert isinstance(data, pa.ChunkedArray)
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 854300a474..0c4dea673b 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -25,7 +25,6 @@ import numpy as np
import pyarrow as pa
import pyarrow.tests.util as test_util
-from pyarrow.vendored.version import Version
def test_schema_constructor_errors():
@@ -659,9 +658,8 @@ def test_schema_from_pandas():
'2006-01-13T12:34:56.432539784',
'2010-08-13T05:46:57.437699912'
], dtype='datetime64[ns]'),
+ pd.array([1, 2, None], dtype=pd.Int32Dtype()),
]
- if Version(pd.__version__) >= Version('1.0.0'):
- inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype()))
for data in inputs:
df = pd.DataFrame({'a': data}, index=data)
schema = pa.Schema.from_pandas(df)