You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2024/03/19 11:54:35 UTC
(superset) 06/09: fix(postprocessing): resample with holes (#27487)
This is an automated email from the ASF dual-hosted git repository.
michaelsmolina pushed a commit to branch 3.1
in repository https://gitbox.apache.org/repos/asf/superset.git
commit f1f20e436c1cfc305c7c1e5e4295ce448d5a1149
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Thu Mar 14 12:02:01 2024 -0700
fix(postprocessing): resample with holes (#27487)
(cherry picked from commit 7f19d296b16d8463931b42c8258600b210b56475)
---
superset/utils/pandas_postprocessing/resample.py | 5 +-
.../pandas_postprocessing/test_resample.py | 54 +++++++++++++++++++++-
2 files changed, 57 insertions(+), 2 deletions(-)
diff --git a/superset/utils/pandas_postprocessing/resample.py b/superset/utils/pandas_postprocessing/resample.py
index a82d7031e9..a689895bd6 100644
--- a/superset/utils/pandas_postprocessing/resample.py
+++ b/superset/utils/pandas_postprocessing/resample.py
@@ -43,13 +43,16 @@ def resample(
raise InvalidPostProcessingError(_("Resample operation requires DatetimeIndex"))
if method not in RESAMPLE_METHOD:
raise InvalidPostProcessingError(
- _("Resample method should in ") + ", ".join(RESAMPLE_METHOD) + "."
+ _("Resample method should be in ") + ", ".join(RESAMPLE_METHOD) + "."
)
if method == "asfreq" and fill_value is not None:
_df = df.resample(rule).asfreq(fill_value=fill_value)
+ _df = _df.fillna(fill_value)
elif method == "linear":
_df = df.resample(rule).interpolate()
else:
_df = getattr(df.resample(rule), method)()
+ if method in ("ffill", "bfill"):
+ _df = _df.fillna(method=method)
return _df
diff --git a/tests/unit_tests/pandas_postprocessing/test_resample.py b/tests/unit_tests/pandas_postprocessing/test_resample.py
index b1414c5fe8..207863ab87 100644
--- a/tests/unit_tests/pandas_postprocessing/test_resample.py
+++ b/tests/unit_tests/pandas_postprocessing/test_resample.py
@@ -21,7 +21,11 @@ from pandas import to_datetime
from superset.exceptions import InvalidPostProcessingError
from superset.utils import pandas_postprocessing as pp
-from tests.unit_tests.fixtures.dataframes import categories_df, timeseries_df
+from tests.unit_tests.fixtures.dataframes import (
+ categories_df,
+ timeseries_df,
+ timeseries_with_gap_df,
+)
def test_resample_should_not_side_effect():
@@ -63,6 +67,29 @@ def test_resample():
)
+def test_resample_ffill_with_gaps():
+ post_df = pp.resample(df=timeseries_with_gap_df, rule="1D", method="ffill")
+ assert post_df.equals(
+ pd.DataFrame(
+ index=pd.to_datetime(
+ [
+ "2019-01-01",
+ "2019-01-02",
+ "2019-01-03",
+ "2019-01-04",
+ "2019-01-05",
+ "2019-01-06",
+ "2019-01-07",
+ ]
+ ),
+ data={
+ "label": ["x", "y", "y", "y", "z", "z", "q"],
+ "y": [1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0],
+ },
+ )
+ )
+
+
def test_resample_zero_fill():
post_df = pp.resample(df=timeseries_df, rule="1D", method="asfreq", fill_value=0)
assert post_df.equals(
@@ -86,6 +113,31 @@ def test_resample_zero_fill():
)
+def test_resample_zero_fill_with_gaps():
+ post_df = pp.resample(
+ df=timeseries_with_gap_df, rule="1D", method="asfreq", fill_value=0
+ )
+ assert post_df.equals(
+ pd.DataFrame(
+ index=pd.to_datetime(
+ [
+ "2019-01-01",
+ "2019-01-02",
+ "2019-01-03",
+ "2019-01-04",
+ "2019-01-05",
+ "2019-01-06",
+ "2019-01-07",
+ ]
+ ),
+ data={
+ "label": ["x", "y", 0, 0, "z", 0, "q"],
+ "y": [1.0, 2.0, 0, 0, 0, 0, 4.0],
+ },
+ )
+ )
+
+
def test_resample_after_pivot():
df = pd.DataFrame(
data={