You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2024/03/19 13:09:54 UTC

(superset) 01/06: fix(postprocessing): resample with holes (#27487)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 4.0
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 1016fd92f665919ae27d3f9dfd143ffb235a9489
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Thu Mar 14 12:02:01 2024 -0700

    fix(postprocessing): resample with holes (#27487)
    
    (cherry picked from commit 7f19d296b16d8463931b42c8258600b210b56475)
---
 superset/utils/pandas_postprocessing/resample.py   |  5 +-
 .../pandas_postprocessing/test_resample.py         | 54 +++++++++++++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/superset/utils/pandas_postprocessing/resample.py b/superset/utils/pandas_postprocessing/resample.py
index a82d7031e9..a689895bd6 100644
--- a/superset/utils/pandas_postprocessing/resample.py
+++ b/superset/utils/pandas_postprocessing/resample.py
@@ -43,13 +43,16 @@ def resample(
         raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex"))
     if method not in RESAMPLE_METHOD:
         raise InvalidPostProcessingError(
-            _("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "."
+            _("Resample method should be in ") + ", ".join(RESAMPLE_METHOD) + "."
         )
 
     if method == "asfreq" and fill_value is not None:
         _df = df.resample(rule).asfreq(fill_value=fill_value)
+        _df = _df.fillna(fill_value)
     elif method == "linear":
         _df = df.resample(rule).interpolate()
     else:
         _df = getattr(df.resample(rule), method)()
+        if method in ("ffill", "bfill"):
+            _df = _df.fillna(method=method)
     return _df
diff --git a/tests/unit_tests/pandas_postprocessing/test_resample.py b/tests/unit_tests/pandas_postprocessing/test_resample.py
index b1414c5fe8..207863ab87 100644
--- a/tests/unit_tests/pandas_postprocessing/test_resample.py
+++ b/tests/unit_tests/pandas_postprocessing/test_resample.py
@@ -21,7 +21,11 @@ from pandas import to_datetime
 
 from superset.exceptions import InvalidPostProcessingError
 from superset.utils import pandas_postprocessing as pp
-from tests.unit_tests.fixtures.dataframes import categories_df, timeseries_df
+from tests.unit_tests.fixtures.dataframes import (
+    categories_df,
+    timeseries_df,
+    timeseries_with_gap_df,
+)
 
 
 def test_resample_should_not_side_effect():
@@ -63,6 +67,29 @@ def test_resample():
     )
 
 
+def test_resample_ffill_with_gaps():
+    post_df = pp.resample(df=timeseries_with_gap_df, rule="1D", method="ffill")
+    assert post_df.equals(
+        pd.DataFrame(
+            index=pd.to_datetime(
+                [
+                    "2019-01-01",
+                    "2019-01-02",
+                    "2019-01-03",
+                    "2019-01-04",
+                    "2019-01-05",
+                    "2019-01-06",
+                    "2019-01-07",
+                ]
+            ),
+            data={
+                "label": ["x", "y", "y", "y", "z", "z", "q"],
+                "y": [1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0],
+            },
+        )
+    )
+
+
 def test_resample_zero_fill():
     post_df = pp.resample(df=timeseries_df, rule="1D", method="asfreq", fill_value=0)
     assert post_df.equals(
@@ -86,6 +113,31 @@ def test_resample_zero_fill():
     )
 
 
+def test_resample_zero_fill_with_gaps():
+    post_df = pp.resample(
+        df=timeseries_with_gap_df, rule="1D", method="asfreq", fill_value=0
+    )
+    assert post_df.equals(
+        pd.DataFrame(
+            index=pd.to_datetime(
+                [
+                    "2019-01-01",
+                    "2019-01-02",
+                    "2019-01-03",
+                    "2019-01-04",
+                    "2019-01-05",
+                    "2019-01-06",
+                    "2019-01-07",
+                ]
+            ),
+            data={
+                "label": ["x", "y", 0, 0, "z", 0, "q"],
+                "y": [1.0, 2.0, 0, 0, 0, 0, 4.0],
+            },
+        )
+    )
+
+
 def test_resample_after_pivot():
     df = pd.DataFrame(
         data={