You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/09/21 02:07:07 UTC
[spark] branch master updated: [SPARK-40500][PS] Deprecate `iteritems` in DataFrame and Seriese
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 44573a26baf [SPARK-40500][PS] Deprecate `iteritems` in DataFrame and Seriese
44573a26baf is described below
commit 44573a26bafb487f0e0c854b141eebf9990b22c1
Author: Ruifeng Zheng <ru...@apache.org>
AuthorDate: Wed Sep 21 11:06:50 2022 +0900
[SPARK-40500][PS] Deprecate `iteritems` in DataFrame and Seriese
### What changes were proposed in this pull request?
1. Use `pd.items` instead of `pd.iteritems`
2. Deprecate `ps.iteritems`
### Why are the changes needed?
`pd.iteritems` is deprecated in 1.5
before:
```
In [4]: import pyspark.pandas as ps
In [5]: ps.Series([3, 4, 1, 1, 5])
/Users/ruifeng.zheng/Dev/spark/python/pyspark/pandas/internal.py:1573: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
fields = [
/Users/ruifeng.zheng/Dev/spark/python/pyspark/sql/pandas/conversion.py:486: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for column, series in pdf.iteritems():
0 3
1 4
2 1
3 1
4 5
dtype: int64
```
after:
```
In [1]: import pyspark.pandas as ps
In [2]: ps.Series([3, 4, 1, 1, 5])
0 3
1 4
2 1
3 1
4 5
dtype: int64
```
### Does this PR introduce _any_ user-facing change?
Eliminate `iteritems` warnings
### How was this patch tested?
existing UT
Closes #37947 from zhengruifeng/ps_iteritems_to_items.
Lead-authored-by: Ruifeng Zheng <ru...@apache.org>
Co-authored-by: Hyukjin Kwon <gu...@gmail.com>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
python/pyspark/pandas/frame.py | 15 +++++++++++----
python/pyspark/pandas/internal.py | 2 +-
python/pyspark/pandas/namespace.py | 2 +-
python/pyspark/pandas/series.py | 15 +++++++++++----
python/pyspark/sql/pandas/conversion.py | 6 +++---
python/pyspark/sql/tests/test_pandas_grouped_map.py | 2 +-
6 files changed, 28 insertions(+), 14 deletions(-)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 014fc175315..8061ea8257f 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -1850,7 +1850,7 @@ class DataFrame(Frame, Generic[T]):
sser.name = None
return sser
- def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
+ def items(self) -> Iterator[Tuple[Name, "Series"]]:
"""
Iterator over (column name, Series) pairs.
@@ -2054,9 +2054,16 @@ class DataFrame(Frame, Generic[T]):
):
yield tuple(([k] if index else []) + list(v))
- def items(self) -> Iterator[Tuple[Name, "Series"]]:
- """This is an alias of ``iteritems``."""
- return self.iteritems()
+ def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
+ """
+ This is an alias of ``items``.
+
+ .. deprecated:: 3.4.0
+ iteritems is deprecated and will be removed in a future version.
+ Use .items instead.
+ """
+ warnings.warn("Deprecated in 3.4.0, Use DataFrame.items instead.", FutureWarning)
+ return self.items()
def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None:
"""
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index b2e67492829..debc68dbd06 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -1579,7 +1579,7 @@ class InternalFrame:
nullable=bool(col.isnull().any()),
),
)
- for (name, col), dtype in zip(reset_index.iteritems(), index_dtypes + data_dtypes)
+ for (name, col), dtype in zip(reset_index.items(), index_dtypes + data_dtypes)
]
return (
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 67e1af38c43..21468855858 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -1191,7 +1191,7 @@ def read_excel(
)
reset_index = pdf.reset_index()
- for name, col in reset_index.iteritems():
+ for name, col in reset_index.items():
dt = col.dtype
if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
continue
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 5222198ec5e..ba954b161c4 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -5951,7 +5951,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
"""
return self.head(2)._to_internal_pandas().item()
- def iteritems(self) -> Iterable[Tuple[Name, Any]]:
+ def items(self) -> Iterable[Tuple[Name, Any]]:
"""
Lazily iterate over (index, value) tuples.
@@ -5998,9 +5998,16 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
):
yield k, v
- def items(self) -> Iterable[Tuple[Name, Any]]:
- """This is an alias of ``iteritems``."""
- return self.iteritems()
+ def iteritems(self) -> Iterable[Tuple[Name, Any]]:
+ """
+ This is an alias of ``items``.
+
+ .. deprecated:: 3.4.0
+ iteritems is deprecated and will be removed in a future version.
+ Use .items instead.
+ """
+ warnings.warn("Deprecated in 3.4, Use Series.items instead.", FutureWarning)
+ return self.items()
def droplevel(self, level: Union[int, Name, List[Union[int, Name]]]) -> "Series":
"""
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index d7f1ee5b8da..afbb4b6788b 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -471,7 +471,7 @@ class SparkConversionMixin:
pdf[field.name] = s
else:
should_localize = not is_timestamp_ntz_preferred()
- for column, series in pdf.iteritems():
+ for column, series in pdf.items():
s = series
if should_localize and is_datetime64tz_dtype(s.dtype) and s.dt.tz is not None:
s = _check_series_convert_timestamps_tz_local(series, timezone)
@@ -483,7 +483,7 @@ class SparkConversionMixin:
copied = True
pdf[column] = s
- for column, series in pdf.iteritems():
+ for column, series in pdf.items():
if is_timedelta64_dtype(series):
if not copied:
pdf = pdf.copy()
@@ -601,7 +601,7 @@ class SparkConversionMixin:
# Create list of Arrow (columns, type) for serializer dump_stream
arrow_data = [
- [(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
+ [(c, t) for (_, c), t in zip(pdf_slice.items(), arrow_types)]
for pdf_slice in pdf_slices
]
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index 4fd5207f73a..b05c8fd86a9 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -708,7 +708,7 @@ class GroupedMapInPandasTests(ReusedSQLTestCase):
window_range = key[1]
# Make sure the key with group and window values are correct
- for _, i in pdf.id.iteritems():
+ for _, i in pdf.id.items():
assert expected_key[i][0] == group, "{} != {}".format(expected_key[i][0], group)
assert expected_key[i][1] == window_range, "{} != {}".format(
expected_key[i][1], window_range
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org