You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by yo...@apache.org on 2022/02/18 15:30:47 UTC
[superset] branch master updated: fix: contribution operator meets nan value (#18782)
This is an automated email from the ASF dual-hosted git repository.
yongjiezhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 987740a fix: contribution operator meets nan value (#18782)
987740a is described below
commit 987740aa8dfff4bf771b587a40f1e12811453660
Author: Yongjie Zhao <yo...@gmail.com>
AuthorDate: Fri Feb 18 23:28:28 2022 +0800
fix: contribution operator meets nan value (#18782)
---
.../src/Timeseries/Area/controlPanel.tsx | 4 +--
.../src/Timeseries/Regular/Bar/controlPanel.tsx | 4 +--
.../src/Timeseries/Regular/controlPanel.tsx | 4 +--
.../src/Timeseries/Step/controlPanel.tsx | 4 +--
.../src/Timeseries/controlPanel.tsx | 4 +--
superset/common/query_object.py | 2 ++
.../utils/pandas_postprocessing/contribution.py | 1 +
.../pandas_postprocessing/test_contribution.py | 36 ++++++++++++++--------
8 files changed, 36 insertions(+), 23 deletions(-)
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
index 720eb94..bae735b 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Area/controlPanel.tsx
@@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
- [EchartsTimeseriesContributionType.Row, 'Total'],
+ [EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
- description: t('Calculate contribution per series or total'),
+ description: t('Calculate contribution per series or row'),
},
},
],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
index 96cb0b6..08d09a0 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/Bar/controlPanel.tsx
@@ -69,10 +69,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
- [EchartsTimeseriesContributionType.Row, 'Total'],
+ [EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
- description: t('Calculate contribution per series or total'),
+ description: t('Calculate contribution per series or row'),
},
},
],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
index 14edc34..701c3a5 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Regular/controlPanel.tsx
@@ -66,10 +66,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
- [EchartsTimeseriesContributionType.Row, 'Total'],
+ [EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
- description: t('Calculate contribution per series or total'),
+ description: t('Calculate contribution per series or row'),
},
},
],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
index 8178e49..8b40330 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/Step/controlPanel.tsx
@@ -72,10 +72,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
- [EchartsTimeseriesContributionType.Row, 'Total'],
+ [EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
- description: t('Calculate contribution per series or total'),
+ description: t('Calculate contribution per series or row'),
},
},
],
diff --git a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
index 1012b2f..f7fd56b 100644
--- a/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
+++ b/superset-frontend/plugins/plugin-chart-echarts/src/Timeseries/controlPanel.tsx
@@ -73,10 +73,10 @@ const config: ControlPanelConfig = {
default: contributionMode,
choices: [
[null, 'None'],
- [EchartsTimeseriesContributionType.Row, 'Total'],
+ [EchartsTimeseriesContributionType.Row, 'Row'],
[EchartsTimeseriesContributionType.Column, 'Series'],
],
- description: t('Calculate contribution per series or total'),
+ description: t('Calculate contribution per series or row'),
},
},
],
diff --git a/superset/common/query_object.py b/superset/common/query_object.py
index 03ee9cb..2a40155 100644
--- a/superset/common/query_object.py
+++ b/superset/common/query_object.py
@@ -19,6 +19,7 @@ from __future__ import annotations
import logging
from datetime import datetime, timedelta
+from pprint import pformat
from typing import Any, Dict, List, NamedTuple, Optional, TYPE_CHECKING
from flask_babel import gettext as _
@@ -395,6 +396,7 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
:raises QueryObjectValidationError: If the post processing operation
is incorrect
"""
+ logger.debug("post_processing: %s", pformat(self.post_processing))
for post_process in self.post_processing:
operation = post_process.get("operation")
if not operation:
diff --git a/superset/utils/pandas_postprocessing/contribution.py b/superset/utils/pandas_postprocessing/contribution.py
index d813961..7097ea3 100644
--- a/superset/utils/pandas_postprocessing/contribution.py
+++ b/superset/utils/pandas_postprocessing/contribution.py
@@ -49,6 +49,7 @@ def contribution(
"""
contribution_df = df.copy()
numeric_df = contribution_df.select_dtypes(include=["number", Decimal])
+ numeric_df.fillna(0, inplace=True)
# verify column selections
if columns:
numeric_columns = numeric_df.columns.tolist()
diff --git a/tests/unit_tests/pandas_postprocessing/test_contribution.py b/tests/unit_tests/pandas_postprocessing/test_contribution.py
index 78212cb..9d2df76 100644
--- a/tests/unit_tests/pandas_postprocessing/test_contribution.py
+++ b/tests/unit_tests/pandas_postprocessing/test_contribution.py
@@ -18,6 +18,8 @@
from datetime import datetime
import pytest
+from numpy import nan
+from numpy.testing import assert_array_equal
from pandas import DataFrame
from superset.exceptions import QueryObjectValidationError
@@ -28,9 +30,14 @@ from superset.utils.pandas_postprocessing import contribution
def test_contribution():
df = DataFrame(
{
- DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),],
- "a": [1, 3],
- "b": [1, 9],
+ DTTM_ALIAS: [
+ datetime(2020, 7, 16, 14, 49),
+ datetime(2020, 7, 16, 14, 50),
+ datetime(2020, 7, 16, 14, 51),
+ ],
+ "a": [1, 3, nan],
+ "b": [1, 9, nan],
+ "c": [nan, nan, nan],
}
)
with pytest.raises(QueryObjectValidationError, match="not numeric"):
@@ -43,18 +50,20 @@ def test_contribution():
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.ROW,
)
- assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"]
- assert processed_df["a"].tolist() == [0.5, 0.25]
- assert processed_df["b"].tolist() == [0.5, 0.75]
+ assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b", "c"]
+ assert_array_equal(processed_df["a"].tolist(), [0.5, 0.25, nan])
+ assert_array_equal(processed_df["b"].tolist(), [0.5, 0.75, nan])
+ assert_array_equal(processed_df["c"].tolist(), [0, 0, nan])
# cell contribution across column without temporal column
df.pop(DTTM_ALIAS)
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.COLUMN
)
- assert processed_df.columns.tolist() == ["a", "b"]
- assert processed_df["a"].tolist() == [0.25, 0.75]
- assert processed_df["b"].tolist() == [0.1, 0.9]
+ assert processed_df.columns.tolist() == ["a", "b", "c"]
+ assert_array_equal(processed_df["a"].tolist(), [0.25, 0.75, 0])
+ assert_array_equal(processed_df["b"].tolist(), [0.1, 0.9, 0])
+ assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
# contribution only on selected columns
processed_df = contribution(
@@ -63,7 +72,8 @@ def test_contribution():
columns=["a"],
rename_columns=["pct_a"],
)
- assert processed_df.columns.tolist() == ["a", "b", "pct_a"]
- assert processed_df["a"].tolist() == [1, 3]
- assert processed_df["b"].tolist() == [1, 9]
- assert processed_df["pct_a"].tolist() == [0.25, 0.75]
+ assert processed_df.columns.tolist() == ["a", "b", "c", "pct_a"]
+ assert_array_equal(processed_df["a"].tolist(), [1, 3, nan])
+ assert_array_equal(processed_df["b"].tolist(), [1, 9, nan])
+ assert_array_equal(processed_df["c"].tolist(), [nan, nan, nan])
+ assert processed_df["pct_a"].tolist() == [0.25, 0.75, 0]