You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by yo...@apache.org on 2022/02/18 15:30:47 UTC

[superset] branch master updated: fix: contribution operator meets nan value (#18782)

This is an automated email from the ASF dual-hosted git repository.

yongjiezhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 987740a  fix: contribution operator meets nan value (#18782)
987740a is described below

commit 987740aa8dfff4bf771b587a40f1e12811453660
Author: Yongjie Zhao <yo...@gmail.com>
AuthorDate: Fri Feb 18 23:28:28 2022 +0800

    fix: contribution operator meets nan value (#18782)
---
 .../src/Timeseries/Area/controlPanel.tsx           |  4 +--
 .../src/Timeseries/Regular/Bar/controlPanel.tsx    |  4 +--
 .../src/Timeseries/Regular/controlPanel.tsx        |  4 +--
 .../src/Timeseries/Step/controlPanel.tsx           |  4 +--
 .../src/Timeseries/controlPanel.tsx                |  4 +--
 superset/common/query_object.py                    |  2 ++
 .../utils/pandas_postprocessing/contribution.py    |  1 +
 .../pandas_postprocessing/test_contribution.py     | 36 ++++++++++++++--------
 8 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
index 720eb94..bae735b 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
@@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
               default: contributionMode,
               choices: [
                 [null, 'None'],
-                [EchartsTimeseriesContributionType.Row, 'Total'],
+                [EchartsTimeseriesContributionType.Row, 'Row'],
                 [EchartsTimeseriesContributionType.Column, 'Series'],
               ],
-              description: t('Calculate contribution per series or total'),
+              description: t('Calculate contribution per series or row'),
             },
           },
         ],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
index 96cb0b6..08d09a0 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
@@ -69,10 +69,10 @@ const config: ControlPanelConfig = {
               default: contributionMode,
               choices: [
                 [null, 'None'],
-                [EchartsTimeseriesContributionType.Row, 'Total'],
+                [EchartsTimeseriesContributionType.Row, 'Row'],
                 [EchartsTimeseriesContributionType.Column, 'Series'],
               ],
-              description: t('Calculate contribution per series or total'),
+              description: t('Calculate contribution per series or row'),
             },
           },
         ],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
index 14edc34..701c3a5 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
@@ -66,10 +66,10 @@ const config: ControlPanelConfig = {
               default: contributionMode,
               choices: [
                 [null, 'None'],
-                [EchartsTimeseriesContributionType.Row, 'Total'],
+                [EchartsTimeseriesContributionType.Row, 'Row'],
                 [EchartsTimeseriesContributionType.Column, 'Series'],
               ],
-              description: t('Calculate contribution per series or total'),
+              description: t('Calculate contribution per series or row'),
             },
           },
         ],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
index 8178e49..8b40330 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
@@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
               default: contributionMode,
               choices: [
                 [null, 'None'],
-                [EchartsTimeseriesContributionType.Row, 'Total'],
+                [EchartsTimeseriesContributionType.Row, 'Row'],
                 [EchartsTimeseriesContributionType.Column, 'Series'],
               ],
-              description: t('Calculate contribution per series or total'),
+              description: t('Calculate contribution per series or row'),
             },
           },
         ],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
index 1012b2f..f7fd56b 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
@@ -73,10 +73,10 @@ const config: ControlPanelConfig = {
               default: contributionMode,
               choices: [
                 [null, 'None'],
-                [EchartsTimeseriesContributionType.Row, 'Total'],
+                [EchartsTimeseriesContributionType.Row, 'Row'],
                 [EchartsTimeseriesContributionType.Column, 'Series'],
               ],
-              description: t('Calculate contribution per series or total'),
+              description: t('Calculate contribution per series or row'),
             },
           },
         ],
diff --git a/superset/common/query_object.py b/superset/common/query_object.py
index 03ee9cb..2a40155 100644
--- a/superset/common/query_object.py
+++ b/superset/common/query_object.py
@@ -19,6 +19,7 @@ from __future__ import annotations
 
 import logging
 from datetime import datetime, timedelta
+from pprint import pformat
 from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING
 
 from flask_babel import gettext as _
@@ -395,6 +396,7 @@ class QueryObject:  # pylint: disable=too-many-instance-attributes
         :raises QueryObjectValidationError: If the post processing operation
                  is incorrect
         """
+        logger.debug("post_processing: %s", pformat(self.post_processing))
         for post_process in self.post_processing:
             operation = post_process.get("operation")
             if not operation:
diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py
index d813961..7097ea3 100644
--- a/superset/utils/pandas_postprocessing/contribution.py
+++ b/superset/utils/pandas_postprocessing/contribution.py
@@ -49,6 +49,7 @@ def contribution(
     """
     contribution_df = df.copy()
     numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
+    numeric_df.fillna(0, inplace=True)
     # verify column selections
     if columns:
         numeric_columns = numeric_df.columns.tolist()
diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py
index 78212cb..9d2df76 100644
--- a/tests/unit_tests/pandas_postprocessing/test_contribution.py
+++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py
@@ -18,6 +18,8 @@
 from datetime import datetime
 
 import pytest
+from numpy import nan
+from numpy.testing import assert_array_equal
 from pandas import DataFrame
 
 from superset.exceptions import QueryObjectValidationError
@@ -28,9 +30,14 @@ from superset.utils.pandas_postprocessing import contribution
 def test_contribution():
     df = DataFrame(
         {
-            DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),],
-            "a": [1, 3],
-            "b": [1, 9],
+            DTTM_ALIAS: [
+                datetime(2020, 7, 16, 14, 49),
+                datetime(2020, 7, 16, 14, 50),
+                datetime(2020, 7, 16, 14, 51),
+            ],
+            "a": [1, 3, nan],
+            "b": [1, 9, nan],
+            "c": [nan, nan, nan],
         }
     )
     with pytest.raises(QueryObjectValidationError, match="not numeric"):
@@ -43,18 +50,20 @@ def test_contribution():
     processed_df = contribution(
         df, orientation=PostProcessingContributionOrientation.ROW,
     )
-    assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"]
-    assert processed_df["a"].tolist() == [0.5, 0.25]
-    assert processed_df["b"].tolist() == [0.5, 0.75]
+    assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"]
+    assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan])
+    assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan])
+    assert_array_equal(processed_df["c"].tolist(), [0, 0, nan])
 
     # cell contribution across column without temporal column
     df.pop(DTTM_ALIAS)
     processed_df = contribution(
         df, orientation=PostProcessingContributionOrientation.COLUMN
     )
-    assert processed_df.columns.tolist() == ["a", "b"]
-    assert processed_df["a"].tolist() == [0.25, 0.75]
-    assert processed_df["b"].tolist() == [0.1, 0.9]
+    assert processed_df.columns.tolist() == ["a", "b", "c"]
+    assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0])
+    assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0])
+    assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
 
     # contribution only on selected columns
     processed_df = contribution(
@@ -63,7 +72,8 @@ def test_contribution():
         columns=["a"],
         rename_columns=["pct_a"],
     )
-    assert processed_df.columns.tolist() == ["a", "b", "pct_a"]
-    assert processed_df["a"].tolist() == [1, 3]
-    assert processed_df["b"].tolist() == [1, 9]
-    assert processed_df["pct_a"].tolist() == [0.25, 0.75]
+    assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"]
+    assert_array_equal(processed_df["a"].tolist(), [1, 3, nan])
+    assert_array_equal(processed_df["b"].tolist(), [1, 9, nan])
+    assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
+    assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]