You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2022/08/30 12:41:42 UTC

[superset] 04/13: fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics (#20235)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 1.5
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 4073b58aa2123991734872c364b344261e51b22f
Author: Diego Medina <di...@gmail.com>
AuthorDate: Wed Jun 1 22:00:04 2022 -0400

    fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics (#20235)
    
    * fix: Box Plot Chart throws an error when the average (AVG) / SUM is being calculated on the Metrics
    
    * add test
    
    (cherry picked from commit 8638f59b4c7ebe954afe46bbfbd5880f1ae6afda)
---
 superset/utils/pandas_postprocessing/boxplot.py    |  9 +++++++-
 .../pandas_postprocessing/test_boxplot.py          | 25 ++++++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/superset/utils/pandas_postprocessing/boxplot.py b/superset/utils/pandas_postprocessing/boxplot.py
index 4436af9182..40ce9200d3 100644
--- a/superset/utils/pandas_postprocessing/boxplot.py
+++ b/superset/utils/pandas_postprocessing/boxplot.py
@@ -18,7 +18,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
 
 import numpy as np
 from flask_babel import gettext as _
-from pandas import DataFrame, Series
+from pandas import DataFrame, Series, to_numeric
 
 from superset.exceptions import InvalidPostProcessingError
 from superset.utils.core import PostProcessingBoxplotWhiskerType
@@ -122,4 +122,11 @@ def boxplot(
         for operator_name, operator in operators.items()
         for metric in metrics
     }
+
+    # nanpercentile needs numeric values, otherwise the isnan function
+    # that's used in the underlying function will fail
+    for column in metrics:
+        if df.dtypes[column] == np.object:
+            df[column] = to_numeric(df[column], errors="coerce")
+
     return aggregate(df, groupby=groupby, aggregates=aggregates)
diff --git a/tests/unit_tests/pandas_postprocessing/test_boxplot.py b/tests/unit_tests/pandas_postprocessing/test_boxplot.py
index 9252b0da78..27dff0adeb 100644
--- a/tests/unit_tests/pandas_postprocessing/test_boxplot.py
+++ b/tests/unit_tests/pandas_postprocessing/test_boxplot.py
@@ -124,3 +124,28 @@ def test_boxplot_percentile_incorrect_params():
             metrics=["cars"],
             percentiles=[10, 90, 10],
         )
+
+
+def test_boxplot_type_coercion():
+    df = names_df
+    df["cars"] = df["cars"].astype(str)
+    df = boxplot(
+        df=df,
+        groupby=["region"],
+        whisker_type=PostProcessingBoxplotWhiskerType.TUKEY,
+        metrics=["cars"],
+    )
+
+    columns = {column for column in df.columns}
+    assert columns == {
+        "cars__mean",
+        "cars__median",
+        "cars__q1",
+        "cars__q3",
+        "cars__max",
+        "cars__min",
+        "cars__count",
+        "cars__outliers",
+        "region",
+    }
+    assert len(df) == 4