You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by vi...@apache.org on 2020/09/16 06:35:21 UTC

[incubator-superset] 25/28: fix: pivot table timestamp grouping (#10774)

This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch 0.37
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit 8ecfb18ac7cdb50e903061e591dd54f5ccea5d51
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Thu Sep 3 19:49:54 2020 +0300

    fix: pivot table timestamp grouping (#10774)
    
    * fix: pivot table timestamp grouping
    
    * address comments
---
 superset/viz.py    | 46 +++++++++++++++++++++++++++++++++-------------
 tests/viz_tests.py | 34 +++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/superset/viz.py b/superset/viz.py
index bdf4c53..df00d43 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -27,7 +27,7 @@ import logging
 import math
 import re
 from collections import defaultdict, OrderedDict
-from datetime import datetime, timedelta
+from datetime import date, datetime, timedelta
 from itertools import product
 from typing import (
     Any,
@@ -602,11 +602,11 @@ class TableViz(BaseViz):
 
     def process_metrics(self) -> None:
         """Process form data and store parsed column configs.
-           1. Determine query mode based on form_data params.
-                - Use `query_mode` if it has a valid value
-                - Set as RAW mode if `all_columns` is set
-                - Otherwise defaults to AGG mode
-           2. Determine output columns based on query mode.
+        1. Determine query mode based on form_data params.
+             - Use `query_mode` if it has a valid value
+             - Set as RAW mode if `all_columns` is set
+             - Otherwise defaults to AGG mode
+        2. Determine output columns based on query mode.
         """
         # Verify form data first: if not specifying query mode, then cannot have both
         # GROUP BY and RAW COLUMNS.
@@ -813,6 +813,31 @@ class PivotTableViz(BaseViz):
         # only min and max work properly for non-numerics
         return aggfunc if aggfunc in ("min", "max") else "max"
 
+    @staticmethod
+    def _format_datetime(value: Union[pd.Timestamp, datetime, date, str]) -> str:
+        """
+        Format a timestamp in such a way that the viz will be able to apply
+        the correct formatting in the frontend.
+
+        :param value: the value of a temporal column
+        :return: formatted timestamp if it is a valid timestamp, otherwise
+                 the original value
+        """
+        tstamp: Optional[pd.Timestamp] = None
+        if isinstance(value, pd.Timestamp):
+            tstamp = value
+        if isinstance(value, datetime) or isinstance(value, date):
+            tstamp = pd.Timestamp(value)
+        if isinstance(value, str):
+            try:
+                tstamp = pd.Timestamp(value)
+            except ValueError:
+                pass
+        if tstamp:
+            return f"__timestamp:{datetime_to_epoch(tstamp)}"
+        # fallback in case something incompatible is returned
+        return cast(str, value)
+
     def get_data(self, df: pd.DataFrame) -> VizData:
         if df.empty:
             return None
@@ -828,15 +853,10 @@ class PivotTableViz(BaseViz):
         groupby = self.form_data.get("groupby") or []
         columns = self.form_data.get("columns") or []
 
-        def _format_datetime(value: Any) -> Optional[str]:
-            if isinstance(value, str):
-                return f"__timestamp:{datetime_to_epoch(pd.Timestamp(value))}"
-            return None
-
         for column_name in groupby + columns:
             column = self.datasource.get_column(column_name)
-            if column and column.type in ("DATE", "DATETIME", "TIMESTAMP"):
-                ts = df[column_name].apply(_format_datetime)
+            if column and column.is_temporal:
+                ts = df[column_name].apply(self._format_datetime)
                 df[column_name] = ts
 
         if self.form_data.get("transpose_pivot"):
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index b52ed7f..6490a4f 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -16,7 +16,7 @@
 # under the License.
 # isort:skip_file
 import uuid
-from datetime import datetime
+from datetime import date, datetime, timezone
 import logging
 from math import nan
 from unittest.mock import Mock, patch
@@ -1345,6 +1345,38 @@ class TestPivotTableViz(SupersetTestCase):
             == "min"
         )
 
+    def test_format_datetime_from_pd_timestamp(self):
+        tstamp = pd.Timestamp(datetime(2020, 9, 3, tzinfo=timezone.utc))
+        assert (
+            viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
+        )
+
+    def test_format_datetime_from_datetime(self):
+        tstamp = datetime(2020, 9, 3, tzinfo=timezone.utc)
+        assert (
+            viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
+        )
+
+    def test_format_datetime_from_date(self):
+        tstamp = date(2020, 9, 3)
+        assert (
+            viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
+        )
+
+    def test_format_datetime_from_string(self):
+        tstamp = "2020-09-03T00:00:00"
+        assert (
+            viz.PivotTableViz._format_datetime(tstamp) == "__timestamp:1599091200000.0"
+        )
+
+    def test_format_datetime_from_invalid_string(self):
+        tstamp = "abracadabra"
+        assert viz.PivotTableViz._format_datetime(tstamp) == tstamp
+
+    def test_format_datetime_from_int(self):
+        assert viz.PivotTableViz._format_datetime(123) == 123
+        assert viz.PivotTableViz._format_datetime(123.0) == 123.0
+
 
 class TestDistributionPieViz(SupersetTestCase):
     base_df = pd.DataFrame(