You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2021/08/18 18:18:06 UTC
[spark] branch master updated: [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3

This is an automated email from the ASF dual-hosted git repository.

ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new c91ae54  [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3
c91ae54 is described below

commit c91ae544fdd44c67fe1e4c73825570dbe71a3206
Author: itholic <ha...@databricks.com>
AuthorDate: Wed Aug 18 11:17:01 2021 -0700

    [SPARK-36388][SPARK-36386][PYTHON][FOLLOWUP] Fix DataFrame groupby-rolling and groupby-expanding to follow pandas 1.3
    
    ### What changes were proposed in this pull request?
    
    This PR is followup for https://github.com/apache/spark/pull/33646 to add missing tests.
    
    ### Why are the changes needed?
    
    Some tests are missing
    
    ### Does this PR introduce _any_ user-facing change?
    
    No
    
    ### How was this patch tested?
    
    Unittest
    
    Closes #33776 from itholic/SPARK-36388-followup.
    
    Authored-by: itholic <ha...@databricks.com>
    Signed-off-by: Takuya UESHIN <ue...@databricks.com>
---
 .../pandas/tests/test_ops_on_diff_frames_groupby_expanding.py    | 9 ++++++---
 .../pandas/tests/test_ops_on_diff_frames_groupby_rolling.py      | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
index 223adea..634cbd7 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
@@ -52,14 +52,17 @@ class OpsOnDiffFramesGroupByExpandingTest(PandasOnSparkTestCase, TestUtils):
         psdf = ps.from_pandas(pdf)
         kkey = ps.from_pandas(pkey)
 
+        # The behavior of GroupBy.expanding is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
-            pass
-        else:
             self.assert_eq(
                 getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(),
                 getattr(pdf.groupby(pkey).expanding(2), f)().sort_index(),
             )
+        else:
+            self.assert_eq(
+                getattr(psdf.groupby(kkey).expanding(2), f)().sort_index(),
+                getattr(pdf.groupby(pkey).expanding(2), f)().drop("a", axis=1).sort_index(),
+            )
 
         self.assert_eq(
             getattr(psdf.groupby(kkey)["b"].expanding(2), f)().sort_index(),
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
index 4f97769..04ea448 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
@@ -50,14 +50,17 @@ class OpsOnDiffFramesGroupByRollingTest(PandasOnSparkTestCase, TestUtils):
         psdf = ps.from_pandas(pdf)
         kkey = ps.from_pandas(pkey)
 
+        # The behavior of GroupBy.rolling is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # TODO(SPARK-36367): Fix the behavior to follow pandas >= 1.3
-            pass
-        else:
             self.assert_eq(
                 getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(),
                 getattr(pdf.groupby(pkey).rolling(2), f)().sort_index(),
             )
+        else:
+            self.assert_eq(
+                getattr(psdf.groupby(kkey).rolling(2), f)().sort_index(),
+                getattr(pdf.groupby(pkey).rolling(2), f)().drop("a", axis=1).sort_index(),
+            )
 
         self.assert_eq(
             getattr(psdf.groupby(kkey)["b"].rolling(2), f)().sort_index(),

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org