You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2023/06/16 20:18:17 UTC

[spark] branch master updated: [SPARK-42618][PYTHON][PS] Warning for the pandas-related behavior changes in next major release

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d73eafcf580 [SPARK-42618][PYTHON][PS] Warning for the pandas-related behavior changes in next major release
d73eafcf580 is described below

commit d73eafcf580655ece01c349dd0b7b2aa2d7d0604
Author: itholic <ha...@databricks.com>
AuthorDate: Fri Jun 16 13:18:03 2023 -0700

    [SPARK-42618][PYTHON][PS] Warning for the pandas-related behavior changes in next major release
    
    ### What changes were proposed in this pull request?
    
    This PR proposes to add warnings for the pandas-related behavior changes in next major release. Furthermore, improved the existing warning messages to provide more specific information.
    
    ### Why are the changes needed?
    
    In the upcoming major release of Apache Spark, 4.0.0, there are expected to be numerous breaking changes to officially support pandas 2.0.0. To ensure that our users are well aware of these changes in advance and to minimize confusion, it is necessary to provide appropriate warning messages for each feature that will undergo behavioral changes.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Users may face the warning messages when they try to use features that will be removed in next major release.
    
    ### How was this patch tested?
    
    The existing CI should be passed.
    
    Closes #41612 from itholic/deprecate_pandas_2.0.
    
    Authored-by: itholic <ha...@databricks.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 python/pyspark/pandas/base.py              | 13 +++++-
 python/pyspark/pandas/frame.py             | 29 ++++++++++++--
 python/pyspark/pandas/generic.py           |  5 +++
 python/pyspark/pandas/groupby.py           | 26 ++++++++++++
 python/pyspark/pandas/indexes/base.py      |  6 +--
 python/pyspark/pandas/indexes/category.py  | 33 ++++++++++++++++
 python/pyspark/pandas/indexes/datetimes.py | 63 ++++++++++++++++++++++++++++++
 python/pyspark/pandas/indexes/numeric.py   |  9 +++++
 python/pyspark/pandas/namespace.py         |  5 +++
 python/pyspark/pandas/plot/matplotlib.py   |  5 +++
 python/pyspark/pandas/series.py            | 18 +++++++--
 python/pyspark/pandas/strings.py           |  5 +++
 12 files changed, 207 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index 01a84b77f40..e005fd19b30 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -1431,7 +1431,13 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
         3    1
         Name: pandas-on-Spark, dtype: int64
         """
-        from pyspark.pandas.series import first_series
+        from pyspark.pandas.series import first_series, Series
+
+        if isinstance(self, Series):
+            warnings.warn(
+                "The resulting Series will have a fixed name of 'count' from 4.0.0.",
+                FutureWarning,
+            )
 
         if bins is not None:
             raise NotImplementedError("value_counts currently does not support bins")
@@ -1688,6 +1694,11 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
         assert (na_sentinel is None) or isinstance(na_sentinel, int)
         assert sort is True
 
+        warnings.warn(
+            "Argument `na_sentinel` will be removed in 4.0.0.",
+            FutureWarning,
+        )
+
         if isinstance(self.dtype, CategoricalDtype):
             categories = self.dtype.categories
             if len(categories) == 0:
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 33bd0dd1e8f..faf5cd028bc 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -2070,7 +2070,10 @@ class DataFrame(Frame, Generic[T]):
             iteritems is deprecated and will be removed in a future version.
             Use .items instead.
         """
-        warnings.warn("Deprecated in 3.4.0, Use DataFrame.items instead.", FutureWarning)
+        warnings.warn(
+            "Deprecated in 3.4.0, and will be removed in 4.0.0. Use DataFrame.items instead.",
+            FutureWarning,
+        )
         return self.items()
 
     def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None:
@@ -2605,6 +2608,10 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         \bottomrule
         \end{tabular}
         """
+        warnings.warn(
+            "Argument `col_space` will be removed in 4.0.0.",
+            FutureWarning,
+        )
 
         args = locals()
         psdf = self
@@ -8897,7 +8904,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         """
         warnings.warn(
             "The DataFrame.append method is deprecated "
-            "and will be removed in a future version. "
+            "and will be removed in 4.0.0. "
             "Use pyspark.pandas.concat instead.",
             FutureWarning,
         )
@@ -11218,6 +11225,11 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         2  2.5
         3  4.0
         """
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `None` in 4.0.0.",
+            FutureWarning,
+        )
         if numeric_only:
             numeric_col_names = []
             for label in self._internal.column_labels:
@@ -12228,6 +12240,11 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         0.50  3.0  7.0
         0.75  4.0  8.0
         """
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
         axis = validate_axis(axis)
         if axis != 0:
             raise NotImplementedError('axis should be either 0 or "index" currently.')
@@ -12746,7 +12763,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         dtype: float64
         """
         warnings.warn(
-            "The 'mad' method is deprecated and will be removed in a future version. "
+            "The 'mad' method is deprecated and will be removed in 4.0.0. "
             "To compute the same result, you may do `(df - df.mean()).abs().mean()`.",
             FutureWarning,
         )
@@ -12891,6 +12908,12 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
         if numeric_only is None and axis == 0:
             numeric_only = True
 
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
+
         mode_scols: List[PySparkColumn] = []
         mode_col_names: List[str] = []
         mode_labels: List[Label] = []
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index b540045f88f..c346889863b 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -1419,6 +1419,11 @@ class Frame(object, metaclass=ABCMeta):
         nan
         """
         axis = validate_axis(axis)
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `None` in 4.0.0.",
+            FutureWarning,
+        )
 
         if numeric_only is None and axis == 0:
             numeric_only = True
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index da04e4d217e..663a635668e 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -646,6 +646,11 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
         2  4.0  1.500000  1.000000
         """
         self._validate_agg_columns(numeric_only=numeric_only, function_name="median")
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
 
         return self._reduce_for_stat_function(
             F.mean, accepted_spark_types=(NumericType,), bool_to_numeric=True
@@ -885,6 +890,11 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
         """
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
         if numeric_only is not None and not isinstance(numeric_only, bool):
             raise TypeError("numeric_only must be None or bool")
         if not isinstance(min_count, int):
@@ -1278,6 +1288,12 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
         if not isinstance(min_count, int):
             raise TypeError("min_count must be integer")
 
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
+
         self._validate_agg_columns(numeric_only=numeric_only, function_name="prod")
 
         return self._reduce_for_stat_function(
@@ -3530,6 +3546,12 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
 
         self._validate_agg_columns(numeric_only=numeric_only, function_name="median")
 
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
+
         def stat_function(col: Column) -> Column:
             return F.percentile_approx(col, 0.5, accuracy)
 
@@ -4256,6 +4278,10 @@ class SeriesGroupBy(GroupBy[Series]):
            NaN    1
         Name: B, dtype: int64
         """
+        warnings.warn(
+            "The resulting Series will have a fixed name of 'count' from 4.0.0.",
+            FutureWarning,
+        )
         groupkeys = self._groupkeys + self._agg_columns
         groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(groupkeys))]
         groupkey_cols = [s.spark.column.alias(name) for s, name in zip(groupkeys, groupkey_names)]
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
index 146c1f2d4cc..35f52012944 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -664,8 +664,7 @@ class Index(IndexOpsMixin):
         True
         """
         warnings.warn(
-            "Index.asi8 is deprecated and will be removed in a future version. "
-            "We recommend using `{}.to_numpy()` instead.".format(type(self).__name__),
+            "Index.asi8 is deprecated and will be removed in 4.0.0. " "Use Index.astype instead.",
             FutureWarning,
         )
         if isinstance(self.spark.data_type, IntegralType):
@@ -1150,7 +1149,8 @@ class Index(IndexOpsMixin):
         True
         """
         warnings.warn(
-            "Index.is_type_compatible is deprecated and will be removed in a " "future version",
+            "Index.is_type_compatible is deprecated and will be removed in 4.0.0. "
+            "Use Index.isin instead.",
             FutureWarning,
         )
         return kind == self.inferred_type
diff --git a/python/pyspark/pandas/indexes/category.py b/python/pyspark/pandas/indexes/category.py
index 79645622d3f..7bc87805e15 100644
--- a/python/pyspark/pandas/indexes/category.py
+++ b/python/pyspark/pandas/indexes/category.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import warnings
 from typing import Any, Callable, List, Optional, Union, cast, no_type_check
 
 import pandas as pd
@@ -252,6 +253,10 @@ class CategoricalIndex(Index):
                          categories=['a', 'b', 'c', 'x'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(
@@ -285,6 +290,10 @@ class CategoricalIndex(Index):
                          categories=['a', 'b', 'c'], ordered=True, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(self.to_series().cat.as_ordered()).rename(self.name)
@@ -316,6 +325,10 @@ class CategoricalIndex(Index):
                          categories=['a', 'b', 'c'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(self.to_series().cat.as_unordered()).rename(self.name)
@@ -369,6 +382,10 @@ class CategoricalIndex(Index):
                          categories=['a', 'c'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(self.to_series().cat.remove_categories(removals)).rename(self.name)
@@ -410,6 +427,10 @@ class CategoricalIndex(Index):
                          categories=['a', 'b', 'c'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(self.to_series().cat.remove_unused_categories()).rename(self.name)
@@ -480,6 +501,10 @@ class CategoricalIndex(Index):
         CategoricalIndex(['A', 'A', 'B'], categories=['A', 'B'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(self.to_series().cat.rename_categories(new_categories)).rename(
@@ -542,6 +567,10 @@ class CategoricalIndex(Index):
                          categories=['c', 'b', 'a'], ordered=False, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(
@@ -625,6 +654,10 @@ class CategoricalIndex(Index):
         CategoricalIndex([1, 2, 2, 3, 3, 3], categories=[1, 2, 3], ordered=True, dtype='category')
         """
         if inplace:
+            warnings.warn(
+                "Argument `inplace` will be removed in 4.0.0.",
+                FutureWarning,
+            )
             raise ValueError("cannot use inplace with CategoricalIndex")
 
         return CategoricalIndex(
diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
index 8cd316ae074..9adef61087a 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 import datetime
+import warnings
 from functools import partial
 from typing import Any, Optional, Union, cast, no_type_check
 
@@ -163,6 +164,10 @@ class DatetimeIndex(Index):
         """
         The days of the datetime.
         """
+        warnings.warn(
+            "`day` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.day)
 
     @property
@@ -170,6 +175,10 @@ class DatetimeIndex(Index):
         """
         The hours of the datetime.
         """
+        warnings.warn(
+            "`hour` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.hour)
 
     @property
@@ -177,6 +186,10 @@ class DatetimeIndex(Index):
         """
         The minutes of the datetime.
         """
+        warnings.warn(
+            "`minute` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.minute)
 
     @property
@@ -184,6 +197,10 @@ class DatetimeIndex(Index):
         """
         The seconds of the datetime.
         """
+        warnings.warn(
+            "`second` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.second)
 
     @property
@@ -191,17 +208,31 @@ class DatetimeIndex(Index):
         """
         The microseconds of the datetime.
         """
+        warnings.warn(
+            "`microsecond` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.microsecond)
 
     @property
     def week(self) -> Index:
         """
         The week ordinal of the year.
+
+        .. deprecated:: 3.5.0
         """
+        warnings.warn(
+            "`week` is deprecated in 3.5.0 and will be removed in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.week)
 
     @property
     def weekofyear(self) -> Index:
+        warnings.warn(
+            "`weekofyear` is deprecated in 3.5.0 and will be removed in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.weekofyear)
 
     weekofyear.__doc__ = week.__doc__
@@ -232,16 +263,28 @@ class DatetimeIndex(Index):
         >>> idx.dayofweek  # doctest: +SKIP
         Int64Index([5, 6, 0, 1, 2, 3, 4, 5, 6], dtype='int64')
         """
+        warnings.warn(
+            "`dayofweek` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.dayofweek)
 
     @property
     def day_of_week(self) -> Index:
+        warnings.warn(
+            "`day_of_week` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return self.dayofweek
 
     day_of_week.__doc__ = dayofweek.__doc__
 
     @property
     def weekday(self) -> Index:
+        warnings.warn(
+            "`weekday` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.weekday)
 
     weekday.__doc__ = dayofweek.__doc__
@@ -251,10 +294,18 @@ class DatetimeIndex(Index):
         """
         The ordinal day of the year.
         """
+        warnings.warn(
+            "`dayofyear` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.dayofyear)
 
     @property
     def day_of_year(self) -> Index:
+        warnings.warn(
+            "`day_of_year` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return self.dayofyear
 
     day_of_year.__doc__ = dayofyear.__doc__
@@ -264,6 +315,10 @@ class DatetimeIndex(Index):
         """
         The quarter of the date.
         """
+        warnings.warn(
+            "`quarter` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.quarter)
 
     @property
@@ -430,10 +485,18 @@ class DatetimeIndex(Index):
         """
         The number of days in the month.
         """
+        warnings.warn(
+            "`daysinmonth` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.daysinmonth)
 
     @property
     def days_in_month(self) -> Index:
+        warnings.warn(
+            "`days_in_month` will return int32 index instead of int 64 index in 4.0.0.",
+            FutureWarning,
+        )
         return Index(self.to_series().dt.days_in_month)
 
     days_in_month.__doc__ = daysinmonth.__doc__
diff --git a/python/pyspark/pandas/indexes/numeric.py b/python/pyspark/pandas/indexes/numeric.py
index 4c378b535ff..d0b5bc5d159 100644
--- a/python/pyspark/pandas/indexes/numeric.py
+++ b/python/pyspark/pandas/indexes/numeric.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import warnings
 from typing import Any, Optional, Union, cast
 
 import pandas as pd
@@ -93,6 +94,10 @@ class Int64Index(IntegerIndex):
         copy: bool = False,
         name: Optional[Name] = None,
     ) -> "Int64Index":
+        warnings.warn(
+            "Int64Index is deprecated in 3.4.0, and will be removed in 4.0.0. Use Index instead.",
+            FutureWarning,
+        )
         if not is_hashable(name):
             raise TypeError("Index.name must be a hashable type")
 
@@ -157,6 +162,10 @@ class Float64Index(NumericIndex):
         copy: bool = False,
         name: Optional[Name] = None,
     ) -> "Float64Index":
+        warnings.warn(
+            "Float64Index is deprecated in 3.4.0, and will be removed in 4.0.0. Use Index instead.",
+            FutureWarning,
+        )
         if not is_hashable(name):
             raise TypeError("Index.name must be a hashable type")
 
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 4a8fcb181e1..e90a4fafae9 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -1900,6 +1900,11 @@ def date_range(
     """
     assert freq not in ["N", "ns"], "nanoseconds is not supported"
     assert tz is None, "Localized DatetimeIndex is not supported"
+    if closed is not None:
+        warnings.warn(
+            "Argument `closed` is deprecated in 3.4.0 and will be removed in 4.0.0.",
+            FutureWarning,
+        )
 
     return cast(
         DatetimeIndex,
diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py
index b64586ae85b..39e862bbae8 100644
--- a/python/pyspark/pandas/plot/matplotlib.py
+++ b/python/pyspark/pandas/plot/matplotlib.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import warnings
 from distutils.version import LooseVersion
 
 import matplotlib as mat
@@ -855,6 +856,10 @@ def plot_frame(
       for bar plot layout by `position` keyword.
       From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center)
     """
+    warnings.warn(
+        "Argument `sort_columns` will be removed in 4.0.0.",
+        FutureWarning,
+    )
 
     return _plot(
         data,
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 3792fd7f7f8..ca9d39bb695 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -1682,6 +1682,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         multicolumn_format: Optional[str] = None,
         multirow: Optional[bool] = None,
     ) -> Optional[str]:
+        warnings.warn(
+            "Argument `col_space` will be removed in 4.0.0.",
+            FutureWarning,
+        )
 
         args = locals()
         psseries = self
@@ -3638,7 +3642,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         """
         warnings.warn(
             "The Series.append method is deprecated "
-            "and will be removed in a future version. "
+            "and will be removed in 4.0.0. "
             "Use pyspark.pandas.concat instead.",
             FutureWarning,
         )
@@ -4193,6 +4197,11 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         >>> s.rank(numeric_only=True)
         Series([], Name: A, dtype: float64)
         """
+        warnings.warn(
+            "Default value of `numeric_only` will be changed to `False` "
+            "instead of `None` in 4.0.0.",
+            FutureWarning,
+        )
         is_numeric = isinstance(self.spark.data_type, (NumericType, BooleanType))
         if numeric_only and not is_numeric:
             return ps.Series([], dtype="float64", name=self.name)
@@ -5956,7 +5965,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         1.0
         """
         warnings.warn(
-            "The 'mad' method is deprecated and will be removed in a future version. "
+            "The 'mad' method is deprecated and will be removed in 4.0.0. "
             "To compute the same result, you may do `(series - series.mean()).abs().mean()`.",
             FutureWarning,
         )
@@ -6128,7 +6137,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
             iteritems is deprecated and will be removed in a future version.
             Use .items instead.
         """
-        warnings.warn("Deprecated in 3.4, Use Series.items instead.", FutureWarning)
+        warnings.warn(
+            "Deprecated in 3.4, and will be removed in 4.0.0. Use Series.items instead.",
+            FutureWarning,
+        )
         return self.items()
 
     def droplevel(self, level: Union[int, Name, List[Union[int, Name]]]) -> "Series":
diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
index d93f08c0196..7c489dea2e3 100644
--- a/python/pyspark/pandas/strings.py
+++ b/python/pyspark/pandas/strings.py
@@ -18,6 +18,7 @@
 """
 String functions on pandas-on-Spark Series
 """
+import warnings
 from typing import (
     Any,
     Callable,
@@ -1604,6 +1605,10 @@ class StringMethods:
         2    None
         dtype: object
         """
+        warnings.warn(
+            "Default value of `regex` will be changed to `False` instead of `True` in 4.0.0.",
+            FutureWarning,
+        )
 
         def pandas_replace(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
             return s.str.replace(pat, repl, n=n, case=case, flags=flags, regex=regex)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org