You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2022/08/30 12:41:42 UTC
[superset] 04/13: fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics (#20235)
This is an automated email from the ASF dual-hosted git repository.
michaelsmolina pushed a commit to branch 1.5
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 4073b58aa2123991734872c364b344261e51b22f
Author: Diego Medina <di...@gmail.com>
AuthorDate: Wed Jun 1 22:00:04 2022 -0400
fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics (#20235)
* fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics
* add test
(cherry picked from commit 8638f59b4c7ebe954afe46bbfbd5880f1ae6afda)
---
superset/utils/pandas_postprocessing/boxplot.py | 9 +++++++-
.../pandas_postprocessing/test_boxplot.py | 25 ++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/superset/utils/pandas_postprocessing/boxplot.py b/superset/utils/pandas_postprocessing/boxplot.py
index 4436af9182..40ce9200d3 100644
--- a/superset/utils/pandas_postprocessing/boxplot.py
+++ b/superset/utils/pandas_postprocessing/boxplot.py
@@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
import numpy as np
from flask_babel import gettext as _
-from pandas import DataFrame, Series
+from pandas import DataFrame, Series, to_numeric
from superset.exceptions import InvalidPostProcessingError
from superset.utils.core import PostProcessingBoxplotWhiskerType
@@ -122,4 +122,11 @@ def boxplot(
for operator_name, operator in operators.items()
for metric in metrics
}
+
+ # nanpercentile needs numeric values, otherwise the isnan function
+ # that's used in the underlying function will fail
+ for column in metrics:
+ if df.dtypes[column] == np.object:
+ df[column] = to_numeric(df[column], errors="coerce")
+
return aggregate(df, groupby=groupby, aggregates=aggregates)
diff --git a/tests/unit_tests/pandas_postprocessing/test_boxplot.py b/tests/unit_tests/pandas_postprocessing/test_boxplot.py
index 9252b0da78..27dff0adeb 100644
--- a/tests/unit_tests/pandas_postprocessing/test_boxplot.py
+++ b/tests/unit_tests/pandas_postprocessing/test_boxplot.py
@@ -124,3 +124,28 @@ def test_boxplot_percentile_incorrect_params():
metrics=["cars"],
percentiles=[10, 90, 10],
)
+
+
+def test_boxplot_type_coercion():
+ df = names_df
+ df["cars"] = df["cars"].astype(str)
+ df = boxplot(
+ df=df,
+ groupby=["region"],
+ whisker_type=PostProcessingBoxplotWhiskerType.TUKEY,
+ metrics=["cars"],
+ )
+
+ columns = {column for column in df.columns}
+ assert columns == {
+ "cars__mean",
+ "cars__median",
+ "cars__q1",
+ "cars__q3",
+ "cars__max",
+ "cars__min",
+ "cars__count",
+ "cars__outliers",
+ "region",
+ }
+ assert len(df) == 4