You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by bh...@apache.org on 2022/02/25 21:24:04 UTC

[beam] branch master updated: [BEAM-12563] swaplevel general function for dataframe and series (#15944)

This is an automated email from the ASF dual-hosted git repository.

bhulette pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new ba2aef5  [BEAM-12563] swaplevel general function for dataframe and series (#15944)
ba2aef5 is described below

commit ba2aef5972eca7322ae68f81bafd52d894e43edf
Author: AlikRodriguez <74...@users.noreply.github.com>
AuthorDate: Fri Feb 25 15:22:12 2022 -0600

    [BEAM-12563] swaplevel general function for dataframe and series (#15944)
    
    * swaplevel general function for dataframe and series
    
    * pandas doctest
    
    * doctest
    
    * Update frames.py
    
    * lint error
---
 sdks/python/apache_beam/dataframe/frames.py               | 10 ++++++++++
 sdks/python/apache_beam/dataframe/frames_test.py          | 10 ++++++++++
 sdks/python/apache_beam/dataframe/pandas_doctests_test.py |  2 --
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py
index 3cbf613..00961a6 100644
--- a/sdks/python/apache_beam/dataframe/frames.py
+++ b/sdks/python/apache_beam/dataframe/frames.py
@@ -231,6 +231,16 @@ class DeferredDataFrameOrSeries(frame_base.DeferredFrame):
 
   @frame_base.with_docs_from(pd.DataFrame)
   @frame_base.args_to_kwargs(pd.DataFrame)
+  def swaplevel(self, **kwargs):
+    return frame_base.DeferredFrame.wrap(
+        expressions.ComputedExpression(
+            'swaplevel',
+            lambda df: df.swaplevel(**kwargs), [self._expr],
+            requires_partition_by=partitionings.Arbitrary(),
+            preserves_partition_by=partitionings.Arbitrary()))
+
+  @frame_base.with_docs_from(pd.DataFrame)
+  @frame_base.args_to_kwargs(pd.DataFrame)
   @frame_base.populate_defaults(pd.DataFrame)
   @frame_base.maybe_inplace
   def fillna(self, value, method, axis, limit, **kwargs):
diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py
index c493db5..ca4f561 100644
--- a/sdks/python/apache_beam/dataframe/frames_test.py
+++ b/sdks/python/apache_beam/dataframe/frames_test.py
@@ -624,6 +624,16 @@ class DeferredFrameTest(_AbstractFrameTest):
         nonparallel=True,
         check_proxy=False)
 
+  def test_swaplevel(self):
+    df = pd.DataFrame(
+        {"Grade": ["A", "B", "A", "C"]},
+        index=[
+            ["Final exam", "Final exam", "Coursework", "Coursework"],
+            ["History", "Geography", "History", "Geography"],
+            ["January", "February", "March", "April"],
+        ])
+    self._run_test(lambda df: df.swaplevel(), df)
+
   def test_value_counts_with_nans(self):
     # similar to doctests that verify value_counts, but include nan values to
     # make sure we handle them correctly.
diff --git a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
index 1a30aa8..19113af 100644
--- a/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
+++ b/sdks/python/apache_beam/dataframe/pandas_doctests_test.py
@@ -269,7 +269,6 @@ class DoctestTest(unittest.TestCase):
                 # frames_test.py::DeferredFrameTest::test_groupby_transform_sum
                 "df.groupby('Date')['Data'].transform('sum')",
             ],
-            'pandas.core.frame.DataFrame.swaplevel': ['*'],
             'pandas.core.frame.DataFrame.melt': ['*'],
             'pandas.core.frame.DataFrame.reindex_axis': ['*'],
             'pandas.core.frame.DataFrame.round': [
@@ -511,7 +510,6 @@ class DoctestTest(unittest.TestCase):
                 'ser.groupby(["a", "b", "a", np.nan]).mean()',
                 'ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()',
             ],
-            'pandas.core.series.Series.swaplevel' :['*']
         },
         skip={
             # Relies on setting values with iloc