You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by yo...@apache.org on 2023/08/27 16:46:47 UTC

[superset] branch master updated: refactor(pinot): The `python_date_format` for a temporal column was not being passed to `get_timestamp_expr` (#24942)

This is an automated email from the ASF dual-hosted git repository.

yongjiezhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new c2a21d2da0 refactor(pinot): The `python_date_format` for a temporal column was not being passed to `get_timestamp_expr` (#24942)
c2a21d2da0 is described below

commit c2a21d2da09dd8e2b8c9c811092007c9fa3ea564
Author: Erich <13...@users.noreply.github.com>
AuthorDate: Sun Aug 27 12:46:39 2023 -0400

    refactor(pinot): The `python_date_format` for a temporal column was not being passed to `get_timestamp_expr` (#24942)
---
 superset/db_engine_specs/pinot.py                  | 134 +++++++--------------
 .../db_engine_specs/pinot_tests.py                 |  39 +++---
 tests/unit_tests/db_engine_specs/test_pinot.py     |  57 +++++++++
 3 files changed, 125 insertions(+), 105 deletions(-)

diff --git a/superset/db_engine_specs/pinot.py b/superset/db_engine_specs/pinot.py
index a0662366d1..2cafd5ecb0 100644
--- a/superset/db_engine_specs/pinot.py
+++ b/superset/db_engine_specs/pinot.py
@@ -14,15 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from typing import Optional
-
-from sqlalchemy.sql.expression import ColumnClause
+from sqlalchemy import types
+from sqlalchemy.engine.interfaces import Dialect
+from sqlalchemy.types import TypeEngine
 
 from superset.constants import TimeGrain
-from superset.db_engine_specs.base import BaseEngineSpec, TimestampExpression
+from superset.db_engine_specs.base import BaseEngineSpec
 
 
-class PinotEngineSpec(BaseEngineSpec):  # pylint: disable=abstract-method
+class PinotEngineSpec(BaseEngineSpec):
     engine = "pinot"
     engine_name = "Apache Pinot"
     allows_subqueries = False
@@ -30,93 +30,51 @@ class PinotEngineSpec(BaseEngineSpec):  # pylint: disable=abstract-method
     allows_alias_in_select = False
     allows_alias_in_orderby = False
 
-    # Pinot does its own conversion below
+    # https://docs.pinot.apache.org/users/user-guide-query/supported-transformations#datetime-functions
     _time_grain_expressions = {
-        TimeGrain.SECOND: "1:SECONDS",
-        TimeGrain.MINUTE: "1:MINUTES",
-        TimeGrain.FIVE_MINUTES: "5:MINUTES",
-        TimeGrain.TEN_MINUTES: "10:MINUTES",
-        TimeGrain.FIFTEEN_MINUTES: "15:MINUTES",
-        TimeGrain.THIRTY_MINUTES: "30:MINUTES",
-        TimeGrain.HOUR: "1:HOURS",
-        TimeGrain.DAY: "1:DAYS",
-        TimeGrain.WEEK: "week",
-        TimeGrain.MONTH: "month",
-        TimeGrain.QUARTER: "quarter",
-        TimeGrain.YEAR: "year",
-    }
-
-    _python_to_java_time_patterns: dict[str, str] = {
-        "%Y": "yyyy",
-        "%m": "MM",
-        "%d": "dd",
-        "%H": "HH",
-        "%M": "mm",
-        "%S": "ss",
-    }
-
-    _use_date_trunc_function: dict[str, bool] = {
-        TimeGrain.SECOND: False,
-        TimeGrain.MINUTE: False,
-        TimeGrain.FIVE_MINUTES: False,
-        TimeGrain.TEN_MINUTES: False,
-        TimeGrain.FIFTEEN_MINUTES: False,
-        TimeGrain.THIRTY_MINUTES: False,
-        TimeGrain.HOUR: False,
-        TimeGrain.DAY: False,
-        TimeGrain.WEEK: True,
-        TimeGrain.MONTH: True,
-        TimeGrain.QUARTER: True,
-        TimeGrain.YEAR: True,
+        None: "{col}",
+        TimeGrain.SECOND: "CAST(DATE_TRUNC('second', "
+        + "CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.MINUTE: "CAST(DATE_TRUNC('minute', "
+        + "CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.FIVE_MINUTES: "CAST(ROUND(DATE_TRUNC('minute', "
+        + "CAST({col} AS TIMESTAMP)), 300000) AS TIMESTAMP)",
+        TimeGrain.TEN_MINUTES: "CAST(ROUND(DATE_TRUNC('minute', "
+        + "CAST({col} AS TIMESTAMP)), 600000) AS TIMESTAMP)",
+        TimeGrain.FIFTEEN_MINUTES: "CAST(ROUND(DATE_TRUNC('minute', "
+        + "CAST({col} AS TIMESTAMP)), 900000) AS TIMESTAMP)",
+        TimeGrain.THIRTY_MINUTES: "CAST(ROUND(DATE_TRUNC('minute', "
+        + "CAST({col} AS TIMESTAMP)), 1800000) AS TIMESTAMP)",
+        TimeGrain.HOUR: "CAST(DATE_TRUNC('hour', CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.DAY: "CAST(DATE_TRUNC('day', CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.WEEK: "CAST(DATE_TRUNC('week', CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.MONTH: "CAST(DATE_TRUNC('month', "
+        + "CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.QUARTER: "CAST(DATE_TRUNC('quarter', "
+        + "CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
+        TimeGrain.YEAR: "CAST(DATE_TRUNC('year', CAST({col} AS TIMESTAMP)) AS TIMESTAMP)",
     }
 
     @classmethod
-    def get_timestamp_expr(
-        cls,
-        col: ColumnClause,
-        pdf: Optional[str],
-        time_grain: Optional[str],
-    ) -> TimestampExpression:
-        if not pdf:
-            raise NotImplementedError(f"Empty date format for '{col}'")
-        is_epoch = pdf in ("epoch_s", "epoch_ms")
+    def epoch_to_dttm(cls) -> str:
+        return (
+            "DATETIMECONVERT({col}, '1:SECONDS:EPOCH', '1:SECONDS:EPOCH', '1:SECONDS')"
+        )
 
-        # The DATETIMECONVERT pinot udf is documented at
-        # Per https://github.com/apache/incubator-pinot/wiki/dateTimeConvert-UDF
-        # We are not really converting any time units, just bucketing them.
-        tf = ""
-        java_date_format = ""
-        if not is_epoch:
-            java_date_format = pdf
-            for (
-                python_pattern,
-                java_pattern,
-            ) in cls._python_to_java_time_patterns.items():
-                java_date_format = java_date_format.replace(
-                    python_pattern, java_pattern
-                )
-            tf = f"1:SECONDS:SIMPLE_DATE_FORMAT:{java_date_format}"
-        else:
-            seconds_or_ms = "MILLISECONDS" if pdf == "epoch_ms" else "SECONDS"
-            tf = f"1:{seconds_or_ms}:EPOCH"
-        if time_grain:
-            granularity = cls.get_time_grain_expressions().get(time_grain)
-            if not granularity:
-                raise NotImplementedError(f"No pinot grain spec for '{time_grain}'")
-        else:
-            return TimestampExpression("{{col}}", col)
+    @classmethod
+    def epoch_ms_to_dttm_(cls) -> str:
+        return (
+            "DATETIMECONVERT({col}, '1:MILLISECONDS:EPOCH', "
+            + "'1:MILLISECONDS:EPOCH', '1:MILLISECONDS')"
+        )
 
-        # In pinot the output is a string since there is no timestamp column like pg
-        if cls._use_date_trunc_function.get(time_grain):
-            if is_epoch:
-                time_expr = f"DATETRUNC('{granularity}', {{col}}, '{seconds_or_ms}')"
-            else:
-                time_expr = (
-                    f"ToDateTime(DATETRUNC('{granularity}', "
-                    + f"FromDateTime({{col}}, '{java_date_format}'), "
-                    + f"'MILLISECONDS'), '{java_date_format}')"
-                )
-        else:
-            time_expr = f"DATETIMECONVERT({{col}}, '{tf}', '{tf}', '{granularity}')"
+    @classmethod
+    def column_datatype_to_string(
+        cls, sqla_column_type: TypeEngine, dialect: Dialect
+    ) -> str:
+        # Pinot driver infers TIMESTAMP column as LONG, so make the quick fix.
+        # When the Pinot driver fix this bug, current method could be removed.
+        if isinstance(sqla_column_type, types.TIMESTAMP):
+            return sqla_column_type.compile().upper()
 
-        return TimestampExpression(time_expr, col)
+        return super().column_datatype_to_string(sqla_column_type, dialect)
diff --git a/tests/integration_tests/db_engine_specs/pinot_tests.py b/tests/integration_tests/db_engine_specs/pinot_tests.py
old mode 100644
new mode 100755
index c6e364a8ea..3998d20940
--- a/tests/integration_tests/db_engine_specs/pinot_tests.py
+++ b/tests/integration_tests/db_engine_specs/pinot_tests.py
@@ -27,61 +27,66 @@ class TestPinotDbEngineSpec(TestDbEngineSpec):
         col = column("tstamp")
         expr = PinotEngineSpec.get_timestamp_expr(col, "epoch_s", "P1D")
         result = str(expr.compile())
+        expected = (
+            "CAST(DATE_TRUNC('day', CAST("
+            + "DATETIMECONVERT(tstamp, '1:SECONDS:EPOCH', "
+            + "'1:SECONDS:EPOCH', '1:SECONDS') AS TIMESTAMP)) AS TIMESTAMP)"
+        )
         self.assertEqual(
             result,
-            "DATETIMECONVERT(tstamp, '1:SECONDS:EPOCH', '1:SECONDS:EPOCH', '1:DAYS')",
+            expected,
         )
 
     def test_pinot_time_expression_simple_date_format_1d_grain(self):
         col = column("tstamp")
         expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1D")
         result = str(expr.compile())
+        expected = "CAST(DATE_TRUNC('day', CAST(tstamp AS TIMESTAMP)) AS TIMESTAMP)"
         self.assertEqual(
             result,
-            (
-                "DATETIMECONVERT(tstamp, "
-                + "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
-                + "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '1:DAYS')"
-            ),
+            expected,
         )
 
     def test_pinot_time_expression_simple_date_format_10m_grain(self):
         col = column("tstamp")
         expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "PT10M")
         result = str(expr.compile())
+        expected = (
+            "CAST(ROUND(DATE_TRUNC('minute', CAST(tstamp AS "
+            + "TIMESTAMP)), 600000) AS TIMESTAMP)"
+        )
         self.assertEqual(
             result,
-            (
-                "DATETIMECONVERT(tstamp, "
-                + "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', "
-                + "'1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss', '10:MINUTES')"
-            ),
+            expected,
         )
 
     def test_pinot_time_expression_simple_date_format_1w_grain(self):
         col = column("tstamp")
         expr = PinotEngineSpec.get_timestamp_expr(col, "%Y-%m-%d %H:%M:%S", "P1W")
         result = str(expr.compile())
+        expected = "CAST(DATE_TRUNC('week', CAST(tstamp AS TIMESTAMP)) AS TIMESTAMP)"
         self.assertEqual(
             result,
-            (
-                "ToDateTime(DATETRUNC('week', FromDateTime(tstamp, "
-                + "'yyyy-MM-dd HH:mm:ss'), 'MILLISECONDS'), 'yyyy-MM-dd HH:mm:ss')"
-            ),
+            expected,
         )
 
     def test_pinot_time_expression_sec_one_1m_grain(self):
         col = column("tstamp")
         expr = PinotEngineSpec.get_timestamp_expr(col, "epoch_s", "P1M")
         result = str(expr.compile())
+        expected = (
+            "CAST(DATE_TRUNC('month', CAST("
+            + "DATETIMECONVERT(tstamp, '1:SECONDS:EPOCH', "
+            + "'1:SECONDS:EPOCH', '1:SECONDS') AS TIMESTAMP)) AS TIMESTAMP)"
+        )
         self.assertEqual(
             result,
-            "DATETRUNC('month', tstamp, 'SECONDS')",
+            expected,
         )
 
     def test_invalid_get_time_expression_arguments(self):
         with self.assertRaises(NotImplementedError):
-            PinotEngineSpec.get_timestamp_expr(column("tstamp"), None, "P1M")
+            PinotEngineSpec.get_timestamp_expr(column("tstamp"), None, "P0.25Y")
 
         with self.assertRaises(NotImplementedError):
             PinotEngineSpec.get_timestamp_expr(
diff --git a/tests/unit_tests/db_engine_specs/test_pinot.py b/tests/unit_tests/db_engine_specs/test_pinot.py
new file mode 100644
index 0000000000..a1648f5f60
--- /dev/null
+++ b/tests/unit_tests/db_engine_specs/test_pinot.py
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from unittest import mock
+
+import pytest
+from sqlalchemy import column
+
+
+@pytest.mark.parametrize(
+    "time_grain,expected_result",
+    [
+        ("PT1S", "CAST(DATE_TRUNC('second', CAST(col AS TIMESTAMP)) AS TIMESTAMP)"),
+        (
+            "PT5M",
+            "CAST(ROUND(DATE_TRUNC('minute', CAST(col AS TIMESTAMP)), 300000) AS TIMESTAMP)",
+        ),
+        ("P1W", "CAST(DATE_TRUNC('week', CAST(col AS TIMESTAMP)) AS TIMESTAMP)"),
+        ("P1M", "CAST(DATE_TRUNC('month', CAST(col AS TIMESTAMP)) AS TIMESTAMP)"),
+        ("P3M", "CAST(DATE_TRUNC('quarter', CAST(col AS TIMESTAMP)) AS TIMESTAMP)"),
+        ("P1Y", "CAST(DATE_TRUNC('year', CAST(col AS TIMESTAMP)) AS TIMESTAMP)"),
+    ],
+)
+def test_timegrain_expressions(time_grain: str, expected_result: str) -> None:
+    """
+    DB Eng Specs (pinot): Test time grain expressions
+    """
+    from superset.db_engine_specs.pinot import PinotEngineSpec as spec
+
+    actual = str(
+        spec.get_timestamp_expr(col=column("col"), pdf=None, time_grain=time_grain)
+    )
+    assert actual == expected_result
+
+
+def test_extras_without_ssl() -> None:
+    from superset.db_engine_specs.pinot import PinotEngineSpec as spec
+    from tests.integration_tests.fixtures.database import default_db_extra
+
+    db = mock.Mock()
+    db.extra = default_db_extra
+    db.server_cert = None
+    extras = spec.get_extra_params(db)
+    assert "connect_args" not in extras["engine_params"]