You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2022/11/22 10:54:49 UTC

[arrow] branch master updated: ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new f769f6b323 ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)
f769f6b323 is described below

commit f769f6b32373fcf5fc2a7a51152b375127ca4af7
Author: Alenka Frim <Al...@users.noreply.github.com>
AuthorDate: Tue Nov 22 11:54:38 2022 +0100

    ARROW-18173: [Python] Drop older versions of Pandas (<1.0) (#14631)
    
    This PR tries to make changes to drop older versions of pandas and support versions >= 1.0.0.
    
    The changes will have to be done in:
    - [x] the official documentation (pandas version support)
    - [x] the CI jobs supporting older pandas versions
    - [x] https://github.com/apache/arrow/blob/master/python/pyarrow/pandas-shim.pxi
    - [x] tests that are specifically testing features on older versions of pandas
    
    Lead-authored-by: Alenka Frim <fr...@gmail.com>
    Co-authored-by: Alenka Frim <Al...@users.noreply.github.com>
    Co-authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 .github/workflows/python.yml                 |   8 +-
 docs/source/python/install.rst               |  15 ++++
 python/pyarrow/feather.py                    |   9 ---
 python/pyarrow/pandas-shim.pxi               |  27 ++-----
 python/pyarrow/pandas_compat.py              |   3 +-
 python/pyarrow/tests/parquet/test_dataset.py |  10 +--
 python/pyarrow/tests/parquet/test_pandas.py  |   5 --
 python/pyarrow/tests/test_compute.py         |  82 ++++++++-------------
 python/pyarrow/tests/test_pandas.py          | 106 ++++++++-------------------
 python/pyarrow/tests/test_schema.py          |   4 +-
 10 files changed, 92 insertions(+), 177 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 3bc4a75b24..1fcf662ba7 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -54,7 +54,7 @@ jobs:
         name:
           - conda-python-docs
           - conda-python-3.8-nopandas
-          - conda-python-3.7-pandas-0.23
+          - conda-python-3.7-pandas-1.0
           - conda-python-3.9-pandas-latest
         include:
           - name: conda-python-docs
@@ -67,12 +67,12 @@ jobs:
             image: conda-python
             title: AMD64 Conda Python 3.8 Without Pandas
             python: 3.8
-          - name: conda-python-3.7-pandas-0.23
+          - name: conda-python-3.7-pandas-1.0
             cache: conda-python-3.7
             image: conda-python-pandas
-            title: AMD64 Conda Python 3.7 Pandas 0.23
+            title: AMD64 Conda Python 3.7 Pandas 1.0
             python: 3.7
-            pandas: 0.23
+            pandas: 1.0
             numpy: 1.16
           - name: conda-python-3.9-pandas-latest
             cache: conda-python-3.9
diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst
index ec71388152..f85b7522eb 100644
--- a/docs/source/python/install.rst
+++ b/docs/source/python/install.rst
@@ -61,3 +61,18 @@ Installing from source
 ----------------------
 
 See :ref:`python-development`.
+
+Dependencies
+------------
+
+Required dependency
+
+* **NumPy 1.16.6** or higher.
+
+Optional dependencies
+
+* **pandas 1.0** or higher,
+* **cffi**.
+
+Additional packages PyArrow is compatible with are :ref:`fsspec <filesystem-fsspec>`
+and **pytz**, **dateutil** or **tzdata** package for timezones.
diff --git a/python/pyarrow/feather.py b/python/pyarrow/feather.py
index a0547949c9..54a16a2f89 100644
--- a/python/pyarrow/feather.py
+++ b/python/pyarrow/feather.py
@@ -24,12 +24,6 @@ from pyarrow.lib import (Codec, Table,  # noqa
 import pyarrow.lib as ext
 from pyarrow import _feather
 from pyarrow._feather import FeatherError  # noqa: F401
-from pyarrow.vendored.version import Version
-
-
-def _check_pandas_version():
-    if _pandas_api.loose_version < Version('0.17.0'):
-        raise ImportError("feather requires pandas >= 0.17.0")
 
 
 class FeatherDataset:
@@ -96,7 +90,6 @@ class FeatherDataset:
         pandas.DataFrame
             Content of the file as a pandas DataFrame (of columns)
         """
-        _check_pandas_version()
         return self.read_table(columns=columns).to_pandas(
             use_threads=use_threads)
 
@@ -145,7 +138,6 @@ def write_feather(df, dest, compression=None, compression_level=None,
         limited legacy format
     """
     if _pandas_api.have_pandas:
-        _check_pandas_version()
         if (_pandas_api.has_sparse and
                 isinstance(df, _pandas_api.pd.SparseDataFrame)):
             df = df.to_dense()
@@ -230,7 +222,6 @@ def read_feather(source, columns=None, use_threads=True,
     -------
     df : pandas.DataFrame
     """
-    _check_pandas_version()
     return (read_table(
         source, columns=columns, memory_map=memory_map,
         use_threads=use_threads).to_pandas(use_threads=use_threads, **kwargs))
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index 0e7cfe9374..4f96943e1b 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -59,16 +59,16 @@ cdef class _PandasAPIShim(object):
         self._version = pd.__version__
         self._loose_version = Version(pd.__version__)
 
-        if self._loose_version < Version('0.23.0'):
+        if self._loose_version < Version('1.0.0'):
             self._have_pandas = False
             if raise_:
                 raise ImportError(
-                    "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+                    "pyarrow requires pandas 1.0.0 or above, pandas {} is "
                     "installed".format(self._version)
                 )
             else:
                 warnings.warn(
-                    "pyarrow requires pandas 0.23.0 or above, pandas {} is "
+                    "pyarrow requires pandas 1.0.0 or above, pandas {} is "
                     "installed. Therefore, pandas-specific integration is not "
                     "used.".format(self._version), stacklevel=2)
                 return
@@ -83,22 +83,12 @@ cdef class _PandasAPIShim(object):
             self._series, self._index, self._categorical_type,
             self._extension_array)
         self._extension_dtype = pd.api.extensions.ExtensionDtype
-        if self._loose_version >= Version('0.24.0'):
-            self._is_extension_array_dtype = \
-                pd.api.types.is_extension_array_dtype
-        else:
-            self._is_extension_array_dtype = None
-
+        self._is_extension_array_dtype = (
+            pd.api.types.is_extension_array_dtype)
         self._types_api = pd.api.types
         self._datetimetz_type = pd.api.types.DatetimeTZDtype
         self._have_pandas = True
-
-        if self._loose_version > Version('0.25'):
-            self.has_sparse = False
-        else:
-            self.has_sparse = True
-
-        self._pd024 = self._loose_version >= Version('0.24')
+        self.has_sparse = False
 
     cdef inline _check_import(self, bint raise_=True):
         if self._tried_importing_pandas:
@@ -232,10 +222,7 @@ cdef class _PandasAPIShim(object):
         self._check_import()
         if isinstance(obj.dtype, (self.pd.api.types.IntervalDtype,
                                   self.pd.api.types.PeriodDtype)):
-            if self._pd024:
-                # only since pandas 0.24, interval and period are stored as
-                # such in Series
-                return obj.array
+            return obj.array
         return obj.values
 
     def assert_frame_equal(self, *args, **kwargs):
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 9fa7a699ef..d624459ca4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -1089,9 +1089,8 @@ def _pandas_type_to_numpy_type(pandas_type):
 
 
 def _get_multiindex_codes(mi):
-    # compat for pandas < 0.24 (MI labels renamed to codes).
     if isinstance(mi, _pandas_api.pd.MultiIndex):
-        return mi.codes if hasattr(mi, 'codes') else mi.labels
+        return mi.codes
     else:
         return None
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index 099a1eaf48..654fd4ddc1 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -250,13 +250,11 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
     result_df = table.to_pandas().reset_index(drop=True)
 
     # Check that all rows in the DF fulfill the filter
-    # Pandas 0.23.x has problems with indexing constant memoryviews in
-    # categoricals. Thus we need to make an explicit copy here with np.array.
-    df_filter_1 = (np.array(result_df['integer']) == 1) \
-        & (np.array(result_df['string']) != 'b') \
-        & (np.array(result_df['boolean']) == 'True')
+    df_filter_1 = (result_df['integer'] == 1) \
+        & (result_df['string'] != 'b') \
+        & (result_df['boolean'] == 'True')
     df_filter_2 = (np.array(result_df['integer']) == 0) \
-        & (np.array(result_df['boolean']) == 'False')
+        & (result_df['boolean'] == 'False')
     assert df_filter_1.sum() > 0
     assert df_filter_2.sum() > 0
     assert result_df.shape[0] == (df_filter_1.sum() + df_filter_2.sum())
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 0d0658cc38..3bc204c978 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -26,7 +26,6 @@ from pyarrow.fs import LocalFileSystem, SubTreeFileSystem
 from pyarrow.tests.parquet.common import (
     parametrize_legacy_dataset, parametrize_legacy_dataset_not_supported)
 from pyarrow.util import guid
-from pyarrow.vendored.version import Version
 
 try:
     import pyarrow.parquet as pq
@@ -561,10 +560,6 @@ def test_pandas_categorical_roundtrip(use_legacy_dataset):
 def test_write_to_dataset_pandas_preserve_extensiondtypes(
     tempdir, use_legacy_dataset
 ):
-    # ARROW-8251 - preserve pandas extension dtypes in roundtrip
-    if Version(pd.__version__) < Version("1.0.0"):
-        pytest.skip("__arrow_array__ added to pandas in 1.0.0")
-
     df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]})
     df['col'] = df['col'].astype("Int64")
     table = pa.table(df)
diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 34dc3bf452..3d03c7d86a 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -1812,14 +1812,6 @@ def test_strptime():
 @pytest.mark.skipif(sys.platform == 'win32',
                     reason="Timezone database is not available on Windows yet")
 def test_strftime():
-    from pyarrow.vendored.version import Version
-
-    def _fix_timestamp(s):
-        if Version(pd.__version__) < Version("1.0.0"):
-            return s.to_series().replace("NaT", pd.NaT)
-        else:
-            return s
-
     times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
     timezones = ["CET", "UTC", "Europe/Ljubljana"]
 
@@ -1834,7 +1826,7 @@ def test_strftime():
             for fmt in formats:
                 options = pc.StrftimeOptions(fmt)
                 result = pc.strftime(tsa, options=options)
-                expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+                expected = pa.array(ts.strftime(fmt))
                 assert result.equals(expected)
 
         fmt = "%Y-%m-%dT%H:%M:%S"
@@ -1842,34 +1834,34 @@ def test_strftime():
         # Default format
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions())
-        expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+        expected = pa.array(ts.strftime(fmt))
         assert result.equals(expected)
 
         # Default format plus timezone
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt + "%Z"))
-        expected = pa.array(_fix_timestamp(ts.strftime(fmt + "%Z")))
+        expected = pa.array(ts.strftime(fmt + "%Z"))
         assert result.equals(expected)
 
         # Pandas %S is equivalent to %S in arrow for unit="s"
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(_fix_timestamp(ts.strftime("%S")))
+        expected = pa.array(ts.strftime("%S"))
         assert result.equals(expected)
 
         # Pandas %S.%f is equivalent to %S in arrow for unit="us"
         tsa = pa.array(ts, type=pa.timestamp("us", timezone))
         options = pc.StrftimeOptions("%S")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(_fix_timestamp(ts.strftime("%S.%f")))
+        expected = pa.array(ts.strftime("%S.%f"))
         assert result.equals(expected)
 
         # Test setting locale
         tsa = pa.array(ts, type=pa.timestamp("s", timezone))
         options = pc.StrftimeOptions(fmt, locale="C")
         result = pc.strftime(tsa, options=options)
-        expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+        expected = pa.array(ts.strftime(fmt))
         assert result.equals(expected)
 
     # Test timestamps without timezone
@@ -1877,7 +1869,8 @@ def test_strftime():
     ts = pd.to_datetime(times)
     tsa = pa.array(ts, type=pa.timestamp("s"))
     result = pc.strftime(tsa, options=pc.StrftimeOptions(fmt))
-    expected = pa.array(_fix_timestamp(ts.strftime(fmt)))
+    expected = pa.array(ts.strftime(fmt))
+
     # Positional format
     assert pc.strftime(tsa, fmt) == result
 
@@ -1956,8 +1949,6 @@ def _check_datetime_components(timestamps, timezone=None):
 
 @pytest.mark.pandas
 def test_extract_datetime_components():
-    from pyarrow.vendored.version import Version
-
     timestamps = ["1970-01-01T00:00:59.123456789",
                   "2000-02-29T23:23:23.999999999",
                   "2033-05-18T03:33:20.000000000",
@@ -1983,8 +1974,6 @@ def test_extract_datetime_components():
     if sys.platform == 'win32':
         # TODO: We should test on windows once ARROW-13168 is resolved.
         pytest.skip('Timezone database is not available on Windows yet')
-    elif Version(pd.__version__) < Version('1.0.0'):
-        pytest.skip('Pandas < 1.0 extracts time components incorrectly.')
     else:
         for timezone in timezones:
             _check_datetime_components(timestamps, timezone)
@@ -1995,8 +1984,6 @@ def test_extract_datetime_components():
 @pytest.mark.skipif(sys.platform == 'win32',
                     reason="Timezone database is not available on Windows yet")
 def test_assume_timezone():
-    from pyarrow.vendored.version import Version
-
     ts_type = pa.timestamp("ns")
     timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
                                  "2000-02-29T23:23:23.999999999",
@@ -2040,31 +2027,29 @@ def test_assume_timezone():
 
     timezone = "Europe/Brussels"
 
-    # nonexistent parameter was introduced in Pandas 0.24.0
-    if Version(pd.__version__) >= Version("0.24.0"):
-        options_nonexistent_raise = pc.AssumeTimezoneOptions(timezone)
-        options_nonexistent_earliest = pc.AssumeTimezoneOptions(
-            timezone, ambiguous="raise", nonexistent="earliest")
-        options_nonexistent_latest = pc.AssumeTimezoneOptions(
-            timezone, ambiguous="raise", nonexistent="latest")
-
-        with pytest.raises(ValueError,
-                           match="Timestamp doesn't exist in "
-                                 f"timezone '{timezone}'"):
-            pc.assume_timezone(nonexistent_array,
-                               options=options_nonexistent_raise)
-
-        expected = pa.array(nonexistent.tz_localize(
-            timezone, nonexistent="shift_forward"))
-        result = pc.assume_timezone(
-            nonexistent_array, options=options_nonexistent_latest)
-        expected.equals(result)
-
-        expected = pa.array(nonexistent.tz_localize(
-            timezone, nonexistent="shift_backward"))
-        result = pc.assume_timezone(
-            nonexistent_array, options=options_nonexistent_earliest)
-        expected.equals(result)
+    options_nonexistent_raise = pc.AssumeTimezoneOptions(timezone)
+    options_nonexistent_earliest = pc.AssumeTimezoneOptions(
+        timezone, ambiguous="raise", nonexistent="earliest")
+    options_nonexistent_latest = pc.AssumeTimezoneOptions(
+        timezone, ambiguous="raise", nonexistent="latest")
+
+    with pytest.raises(ValueError,
+                       match="Timestamp doesn't exist in "
+                       f"timezone '{timezone}'"):
+        pc.assume_timezone(nonexistent_array,
+                           options=options_nonexistent_raise)
+
+    expected = pa.array(nonexistent.tz_localize(
+        timezone, nonexistent="shift_forward"))
+    result = pc.assume_timezone(
+        nonexistent_array, options=options_nonexistent_latest)
+    expected.equals(result)
+
+    expected = pa.array(nonexistent.tz_localize(
+        timezone, nonexistent="shift_backward"))
+    result = pc.assume_timezone(
+        nonexistent_array, options=options_nonexistent_earliest)
+    expected.equals(result)
 
     options_ambiguous_raise = pc.AssumeTimezoneOptions(timezone)
     options_ambiguous_latest = pc.AssumeTimezoneOptions(
@@ -2199,11 +2184,6 @@ def _check_temporal_rounding(ts, values, unit):
                                   "second", "minute", "hour", "day"))
 @pytest.mark.pandas
 def test_round_temporal(unit):
-    from pyarrow.vendored.version import Version
-
-    if Version(pd.__version__) < Version('1.0.0'):
-        pytest.skip('Pandas < 1.0 rounds differently.')
-
     values = (1, 2, 3, 4, 5, 6, 7, 10, 15, 24, 60, 250, 500, 750)
     timestamps = [
         "1923-07-07 08:52:35.203790336",
diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
index 6ec0532a02..a1ab4d4388 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -1071,13 +1071,10 @@ class TestConvertDateTimeLikeTypes:
         pytz = pytest.importorskip("pytz")
         from datetime import timezone
 
-        if Version(pd.__version__) > Version("0.25.0"):
-            # older pandas versions fail on datetime.timezone.utc (as in input)
-            # vs pytz.UTC (as in result)
-            values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)]
-            # also test with index to ensure both paths roundtrip (ARROW-9962)
-            df = pd.DataFrame({'datetime': values}, index=values)
-            _check_pandas_roundtrip(df, preserve_index=True)
+        values = [datetime(2018, 1, 1, 12, 23, 45, tzinfo=timezone.utc)]
+        # also test with index to ensure both paths roundtrip (ARROW-9962)
+        df = pd.DataFrame({'datetime': values}, index=values)
+        _check_pandas_roundtrip(df, preserve_index=True)
 
         # datetime.timezone is going to be pytz.FixedOffset
         hours = 1
@@ -2857,13 +2854,9 @@ def _fully_loaded_dataframe_example():
         6: [True, False] * 5,
         7: np.random.randn(10),
         8: np.random.randint(0, 100, size=10),
-        9: pd.period_range('2013', periods=10, freq='M')
+        9: pd.period_range('2013', periods=10, freq='M'),
+        10: pd.interval_range(start=1, freq=1, periods=10),
     }
-
-    if Version(pd.__version__) >= Version('0.21'):
-        # There is an issue with pickling IntervalIndex in pandas 0.20.x
-        data[10] = pd.interval_range(start=1, freq=1, periods=10)
-
     return pd.DataFrame(data, index=index)
 
 
@@ -2941,16 +2934,6 @@ def test_convert_unsupported_type_error_message():
     with pytest.raises(ValueError, match=msg):
         pa.Table.from_pandas(df)
 
-    # period unsupported for pandas <= 0.25
-    if Version(pd.__version__) <= Version('0.25'):
-        df = pd.DataFrame({
-            'a': pd.period_range('2000-01-01', periods=20),
-        })
-
-        msg = 'Conversion failed for column a with type (period|object)'
-        with pytest.raises((TypeError, ValueError), match=msg):
-            pa.Table.from_pandas(df)
-
 
 # ----------------------------------------------------------------------
 # Hypothesis tests
@@ -3852,40 +3835,32 @@ def test_dictionary_from_pandas_specified_type():
 
 
 def test_array_protocol():
-    if Version(pd.__version__) < Version('0.24.0'):
-        pytest.skip('IntegerArray only introduced in 0.24')
-
     df = pd.DataFrame({'a': pd.Series([1, 2, None], dtype='Int64')})
 
-    if Version(pd.__version__) < Version('0.26.0.dev'):
-        # with pandas<=0.25, trying to convert nullable integer errors
-        with pytest.raises(TypeError):
-            pa.table(df)
-    else:
-        # __arrow_array__ added to pandas IntegerArray in 0.26.0.dev
+    # __arrow_array__ added to pandas IntegerArray in 0.26.0.dev
 
-        # default conversion
-        result = pa.table(df)
-        expected = pa.array([1, 2, None], pa.int64())
-        assert result[0].chunk(0).equals(expected)
+    # default conversion
+    result = pa.table(df)
+    expected = pa.array([1, 2, None], pa.int64())
+    assert result[0].chunk(0).equals(expected)
 
-        # with specifying schema
-        schema = pa.schema([('a', pa.float64())])
-        result = pa.table(df, schema=schema)
-        expected2 = pa.array([1, 2, None], pa.float64())
-        assert result[0].chunk(0).equals(expected2)
+    # with specifying schema
+    schema = pa.schema([('a', pa.float64())])
+    result = pa.table(df, schema=schema)
+    expected2 = pa.array([1, 2, None], pa.float64())
+    assert result[0].chunk(0).equals(expected2)
 
-        # pass Series to pa.array
-        result = pa.array(df['a'])
-        assert result.equals(expected)
-        result = pa.array(df['a'], type=pa.float64())
-        assert result.equals(expected2)
+    # pass Series to pa.array
+    result = pa.array(df['a'])
+    assert result.equals(expected)
+    result = pa.array(df['a'], type=pa.float64())
+    assert result.equals(expected2)
 
-        # pass actual ExtensionArray to pa.array
-        result = pa.array(df['a'].values)
-        assert result.equals(expected)
-        result = pa.array(df['a'].values, type=pa.float64())
-        assert result.equals(expected2)
+    # pass actual ExtensionArray to pa.array
+    result = pa.array(df['a'].values)
+    assert result.equals(expected)
+    result = pa.array(df['a'].values, type=pa.float64())
+    assert result.equals(expected2)
 
 
 class DummyExtensionType(pa.PyExtensionType):
@@ -3907,9 +3882,6 @@ def PandasArray__arrow_array__(self, type=None):
 def test_array_protocol_pandas_extension_types(monkeypatch):
     # ARROW-7022 - ensure protocol works for Period / Interval extension dtypes
 
-    if Version(pd.__version__) < Version('0.24.0'):
-        pytest.skip('Period/IntervalArray only introduced in 0.24')
-
     storage = pa.array([1, 2, 3], type=pa.int64())
     expected = pa.ExtensionArray.from_storage(DummyExtensionType(), storage)
 
@@ -3956,9 +3928,6 @@ def _Int64Dtype__from_arrow__(self, array):
 
 
 def test_convert_to_extension_array(monkeypatch):
-    if Version(pd.__version__) < Version("0.26.0.dev"):
-        pytest.skip("Conversion from IntegerArray to arrow not yet supported")
-
     import pandas.core.internals as _int
 
     # table converted from dataframe with extension types (so pandas_metadata
@@ -4012,19 +3981,10 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
     # converting extension type to linked pandas ExtensionDtype/Array
     import pandas.core.internals as _int
 
-    if Version(pd.__version__) < Version("0.24.0"):
-        pytest.skip("ExtensionDtype introduced in pandas 0.24")
-
     storage = pa.array([1, 2, 3, 4], pa.int64())
     arr = pa.ExtensionArray.from_storage(MyCustomIntegerType(), storage)
     table = pa.table({'a': arr})
 
-    if Version(pd.__version__) < Version("0.26.0.dev"):
-        # ensure pandas Int64Dtype has the protocol method (for older pandas)
-        monkeypatch.setattr(
-            pd.Int64Dtype, '__from_arrow__', _Int64Dtype__from_arrow__,
-            raising=False)
-
     # extension type points to Int64Dtype, which knows how to create a
     # pandas ExtensionArray
     result = arr.to_pandas()
@@ -4039,9 +3999,7 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
 
     # monkeypatch pandas Int64Dtype to *not* have the protocol method
     # (remove the version added above and the actual version for recent pandas)
-    if Version(pd.__version__) < Version("0.26.0.dev"):
-        monkeypatch.delattr(pd.Int64Dtype, "__from_arrow__")
-    elif Version(pd.__version__) < Version("1.3.0.dev"):
+    if Version(pd.__version__) < Version("1.3.0.dev"):
         monkeypatch.delattr(
             pd.core.arrays.integer._IntegerDtype, "__from_arrow__")
     else:
@@ -4058,9 +4016,6 @@ def test_conversion_extensiontype_to_extensionarray(monkeypatch):
 
 
 def test_to_pandas_extension_dtypes_mapping():
-    if Version(pd.__version__) < Version("0.26.0.dev"):
-        pytest.skip("Conversion to pandas IntegerArray not yet supported")
-
     table = pa.table({'a': pa.array([1, 2, 3], pa.int64())})
 
     # default use numpy dtype
@@ -4102,9 +4057,6 @@ def test_array_to_pandas():
 
 
 def test_roundtrip_empty_table_with_extension_dtype_index():
-    if Version(pd.__version__) < Version("1.0.0"):
-        pytest.skip("ExtensionDtype to_pandas method missing")
-
     df = pd.DataFrame(index=pd.interval_range(start=0, end=3))
     table = pa.table(df)
     table.to_pandas().index == pd.Index([{'left': 0, 'right': 1},
@@ -4116,7 +4068,7 @@ def test_roundtrip_empty_table_with_extension_dtype_index():
 def test_array_to_pandas_types_mapper():
     # https://issues.apache.org/jira/browse/ARROW-9664
     if Version(pd.__version__) < Version("1.2.0"):
-        pytest.skip("ExtensionDtype to_pandas method missing")
+        pytest.skip("Float64Dtype extension dtype missing")
 
     data = pa.array([1, 2, 3], pa.int64())
 
@@ -4140,7 +4092,7 @@ def test_array_to_pandas_types_mapper():
 def test_chunked_array_to_pandas_types_mapper():
     # https://issues.apache.org/jira/browse/ARROW-9664
     if Version(pd.__version__) < Version("1.2.0"):
-        pytest.skip("ExtensionDtype to_pandas method missing")
+        pytest.skip("Float64Dtype extension dtype missing")
 
     data = pa.chunked_array([pa.array([1, 2, 3], pa.int64())])
     assert isinstance(data, pa.ChunkedArray)
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 854300a474..0c4dea673b 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -25,7 +25,6 @@ import numpy as np
 import pyarrow as pa
 
 import pyarrow.tests.util as test_util
-from pyarrow.vendored.version import Version
 
 
 def test_schema_constructor_errors():
@@ -659,9 +658,8 @@ def test_schema_from_pandas():
             '2006-01-13T12:34:56.432539784',
             '2010-08-13T05:46:57.437699912'
         ], dtype='datetime64[ns]'),
+        pd.array([1, 2, None], dtype=pd.Int32Dtype()),
     ]
-    if Version(pd.__version__) >= Version('1.0.0'):
-        inputs.append(pd.array([1, 2, None], dtype=pd.Int32Dtype()))
     for data in inputs:
         df = pd.DataFrame({'a': data}, index=data)
         schema = pa.Schema.from_pandas(df)