You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/08/07 23:00:03 UTC

[superset] branch master updated: feat: improve SQLite DB engine spec (#24909)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 85a7d5cb3e feat: improve SQLite DB engine spec (#24909)
85a7d5cb3e is described below

commit 85a7d5cb3ebe833cfc2980f0846f15bb7ce1dd01
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Mon Aug 7 15:59:56 2023 -0700

    feat: improve SQLite DB engine spec (#24909)
---
 superset/db_engine_specs/gsheets.py             |   7 +-
 superset/db_engine_specs/shillelagh.py          |  18 +++
 superset/db_engine_specs/sqlite.py              | 175 +++++++++++++++++++++++-
 tests/integration_tests/databases/api_tests.py  | 132 +++++++++++++++++-
 tests/unit_tests/db_engine_specs/test_sqlite.py |  88 ++++++++----
 5 files changed, 383 insertions(+), 37 deletions(-)

diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py
index abf5bac48f..777499a8f9 100644
--- a/superset/db_engine_specs/gsheets.py
+++ b/superset/db_engine_specs/gsheets.py
@@ -32,7 +32,7 @@ from typing_extensions import TypedDict
 from superset import security_manager
 from superset.constants import PASSWORD_MASK
 from superset.databases.schemas import encrypted_field_properties, EncryptedString
-from superset.db_engine_specs.sqlite import SqliteEngineSpec
+from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
 
 if TYPE_CHECKING:
@@ -65,14 +65,13 @@ class GSheetsPropertiesType(TypedDict):
     catalog: dict[str, str]
 
 
-class GSheetsEngineSpec(SqliteEngineSpec):
+class GSheetsEngineSpec(ShillelaghEngineSpec):
     """Engine for Google spreadsheets"""
 
-    engine = "gsheets"
     engine_name = "Google Sheets"
+    engine = "gsheets"
     allows_joins = True
     allows_subqueries = True
-    disable_ssh_tunneling = True
 
     parameters_schema = GSheetsParametersSchema()
     default_driver = "apsw"
diff --git a/superset/db_engine_specs/shillelagh.py b/superset/db_engine_specs/shillelagh.py
index 3730122448..61820824b0 100644
--- a/superset/db_engine_specs/shillelagh.py
+++ b/superset/db_engine_specs/shillelagh.py
@@ -14,8 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
 from superset.db_engine_specs.sqlite import SqliteEngineSpec
 
+if TYPE_CHECKING:
+    from superset.models.core import Database
+
 
 class ShillelaghEngineSpec(SqliteEngineSpec):
     """Engine for shillelagh"""
@@ -28,3 +35,14 @@ class ShillelaghEngineSpec(SqliteEngineSpec):
 
     allows_joins = True
     allows_subqueries = True
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        return super().get_function_names(database) + [
+            "sleep",
+            "version",
+            "get_metadata",
+        ]
diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py
index 06d5537509..c06660abbd 100644
--- a/superset/db_engine_specs/sqlite.py
+++ b/superset/db_engine_specs/sqlite.py
@@ -14,10 +14,13 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+
+from __future__ import annotations
+
 import re
 from datetime import datetime
 from re import Pattern
-from typing import Any, Optional, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
 
 from flask_babel import gettext as __
 from sqlalchemy import types
@@ -39,11 +42,41 @@ class SqliteEngineSpec(BaseEngineSpec):
     engine = "sqlite"
     engine_name = "SQLite"
 
+    disable_ssh_tunneling = True
+
     _time_grain_expressions = {
         None: "{col}",
         TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+        TimeGrain.FIVE_SECONDS: (
+            "DATETIME({col}, printf('-%d seconds', "
+            "CAST(strftime('%S', {col}) AS INT) % 5))"
+        ),
+        TimeGrain.THIRTY_SECONDS: (
+            "DATETIME({col}, printf('-%d seconds', "
+            "CAST(strftime('%S', {col}) AS INT) % 30))"
+        ),
         TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+        TimeGrain.FIVE_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 5))"
+        ),
+        TimeGrain.TEN_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 10))"
+        ),
+        TimeGrain.FIFTEEN_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 15))"
+        ),
+        TimeGrain.THIRTY_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 30))"
+        ),
         TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+        TimeGrain.SIX_HOURS: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', "
+            "CAST(strftime('%H', {col}) AS INT) % 6))"
+        ),
         TimeGrain.DAY: "DATETIME({col}, 'start of day')",
         TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
             -strftime('%w', {col}) || ' days')",
@@ -62,6 +95,13 @@ class SqliteEngineSpec(BaseEngineSpec):
             "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
         ),
     }
+    # not sure why these are diffenret
+    _time_grain_expressions.update(
+        {
+            TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES],
+            TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER],
+        }
+    )
 
     custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
         COLUMN_DOES_NOT_EXIST_REGEX: (
@@ -77,8 +117,8 @@ class SqliteEngineSpec(BaseEngineSpec):
 
     @classmethod
     def convert_dttm(
-        cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
-    ) -> Optional[str]:
+        cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
+    ) -> str | None:
         sqla_type = cls.get_sqla_column_type(target_type)
         if isinstance(sqla_type, (types.String, types.DateTime)):
             return f"""'{dttm.isoformat(sep=" ", timespec="seconds")}'"""
@@ -86,7 +126,134 @@ class SqliteEngineSpec(BaseEngineSpec):
 
     @classmethod
     def get_table_names(
-        cls, database: "Database", inspector: Inspector, schema: Optional[str]
+        cls,
+        database: Database,
+        inspector: Inspector,
+        schema: str | None,
     ) -> set[str]:
         """Need to disregard the schema for Sqlite"""
         return set(inspector.get_table_names())
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        """
+        Return function names.
+        """
+        return [
+            "abs",
+            "acos",
+            "acosh",
+            "asin",
+            "asinh",
+            "atan",
+            "atan2",
+            "atanh",
+            "avg",
+            "ceil",
+            "ceiling",
+            "changes",
+            "char",
+            "coalesce",
+            "cos",
+            "cosh",
+            "count",
+            "cume_dist",
+            "date",
+            "datetime",
+            "degrees",
+            "dense_rank",
+            "exp",
+            "first_value",
+            "floor",
+            "format",
+            "glob",
+            "group_concat",
+            "hex",
+            "ifnull",
+            "iif",
+            "instr",
+            "json",
+            "json_array",
+            "json_array_length",
+            "json_each",
+            "json_error_position",
+            "json_extract",
+            "json_group_array",
+            "json_group_object",
+            "json_insert",
+            "json_object",
+            "json_patch",
+            "json_quote",
+            "json_remove",
+            "json_replace",
+            "json_set",
+            "json_tree",
+            "json_type",
+            "json_valid",
+            "julianday",
+            "lag",
+            "last_insert_rowid",
+            "last_value",
+            "lead",
+            "length",
+            "like",
+            "likelihood",
+            "likely",
+            "ln",
+            "load_extension",
+            "log",
+            "log10",
+            "log2",
+            "lower",
+            "ltrim",
+            "max",
+            "min",
+            "mod",
+            "nth_value",
+            "ntile",
+            "nullif",
+            "percent_rank",
+            "pi",
+            "pow",
+            "power",
+            "printf",
+            "quote",
+            "radians",
+            "random",
+            "randomblob",
+            "rank",
+            "replace",
+            "round",
+            "row_number",
+            "rtrim",
+            "sign",
+            "sin",
+            "sinh",
+            "soundex",
+            "sqlite_compileoption_get",
+            "sqlite_compileoption_used",
+            "sqlite_offset",
+            "sqlite_source_id",
+            "sqlite_version",
+            "sqrt",
+            "strftime",
+            "substr",
+            "substring",
+            "sum",
+            "tan",
+            "tanh",
+            "time",
+            "total_changes",
+            "trim",
+            "trunc",
+            "typeof",
+            "unhex",
+            "unicode",
+            "unixepoch",
+            "unlikely",
+            "upper",
+            "zeroblob",
+        ]
diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py
index bd39d96574..8bf4867d01 100644
--- a/tests/integration_tests/databases/api_tests.py
+++ b/tests/integration_tests/databases/api_tests.py
@@ -2836,7 +2836,7 @@ class TestDatabaseApi(SupersetTestCase):
     )
     def test_function_names(self, mock_get_function_names):
         example_db = get_example_database()
-        if example_db.backend in {"hive", "presto"}:
+        if example_db.backend in {"hive", "presto", "sqlite"}:
             return
 
         mock_get_function_names.return_value = ["AVG", "MAX", "SUM"]
@@ -2850,6 +2850,136 @@ class TestDatabaseApi(SupersetTestCase):
         assert rv.status_code == 200
         assert response == {"function_names": ["AVG", "MAX", "SUM"]}
 
+    def test_function_names_sqlite(self):
+        example_db = get_example_database()
+        if example_db.backend != "sqlite":
+            return
+
+        self.login(username="admin")
+        uri = "api/v1/database/1/function_names/"
+
+        rv = self.client.get(uri)
+        response = json.loads(rv.data.decode("utf-8"))
+
+        assert rv.status_code == 200
+        assert response == {
+            "function_names": [
+                "abs",
+                "acos",
+                "acosh",
+                "asin",
+                "asinh",
+                "atan",
+                "atan2",
+                "atanh",
+                "avg",
+                "ceil",
+                "ceiling",
+                "changes",
+                "char",
+                "coalesce",
+                "cos",
+                "cosh",
+                "count",
+                "cume_dist",
+                "date",
+                "datetime",
+                "degrees",
+                "dense_rank",
+                "exp",
+                "first_value",
+                "floor",
+                "format",
+                "glob",
+                "group_concat",
+                "hex",
+                "ifnull",
+                "iif",
+                "instr",
+                "json",
+                "json_array",
+                "json_array_length",
+                "json_each",
+                "json_error_position",
+                "json_extract",
+                "json_group_array",
+                "json_group_object",
+                "json_insert",
+                "json_object",
+                "json_patch",
+                "json_quote",
+                "json_remove",
+                "json_replace",
+                "json_set",
+                "json_tree",
+                "json_type",
+                "json_valid",
+                "julianday",
+                "lag",
+                "last_insert_rowid",
+                "last_value",
+                "lead",
+                "length",
+                "like",
+                "likelihood",
+                "likely",
+                "ln",
+                "load_extension",
+                "log",
+                "log10",
+                "log2",
+                "lower",
+                "ltrim",
+                "max",
+                "min",
+                "mod",
+                "nth_value",
+                "ntile",
+                "nullif",
+                "percent_rank",
+                "pi",
+                "pow",
+                "power",
+                "printf",
+                "quote",
+                "radians",
+                "random",
+                "randomblob",
+                "rank",
+                "replace",
+                "round",
+                "row_number",
+                "rtrim",
+                "sign",
+                "sin",
+                "sinh",
+                "soundex",
+                "sqlite_compileoption_get",
+                "sqlite_compileoption_used",
+                "sqlite_offset",
+                "sqlite_source_id",
+                "sqlite_version",
+                "sqrt",
+                "strftime",
+                "substr",
+                "substring",
+                "sum",
+                "tan",
+                "tanh",
+                "time",
+                "total_changes",
+                "trim",
+                "trunc",
+                "typeof",
+                "unhex",
+                "unicode",
+                "unixepoch",
+                "unlikely",
+                "upper",
+                "zeroblob",
+            ]
+        }
+
     @mock.patch("superset.databases.api.get_available_engine_specs")
     @mock.patch("superset.databases.api.app")
     def test_available(self, app, get_available_engine_specs):
diff --git a/tests/unit_tests/db_engine_specs/test_sqlite.py b/tests/unit_tests/db_engine_specs/test_sqlite.py
index 11ce174c0f..a31992afcf 100644
--- a/tests/unit_tests/db_engine_specs/test_sqlite.py
+++ b/tests/unit_tests/db_engine_specs/test_sqlite.py
@@ -21,6 +21,7 @@ from typing import Optional
 import pytest
 from sqlalchemy.engine import create_engine
 
+from superset.constants import TimeGrain
 from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
 from tests.unit_tests.fixtures.common import dttm
 
@@ -47,13 +48,21 @@ def test_convert_dttm(
 @pytest.mark.parametrize(
     "dttm,grain,expected",
     [
-        ("2022-05-04T05:06:07.89Z", "PT1S", "2022-05-04 05:06:07"),
-        ("2022-05-04T05:06:07.89Z", "PT1M", "2022-05-04 05:06:00"),
-        ("2022-05-04T05:06:07.89Z", "PT1H", "2022-05-04 05:00:00"),
-        ("2022-05-04T05:06:07.89Z", "P1D", "2022-05-04 00:00:00"),
-        ("2022-05-04T05:06:07.89Z", "P1W", "2022-05-01 00:00:00"),
-        ("2022-05-04T05:06:07.89Z", "P1M", "2022-05-01 00:00:00"),
-        ("2022-05-04T05:06:07.89Z", "P1Y", "2022-01-01 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.SECOND, "2022-05-04 05:06:07"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_SECONDS, "2022-05-04 05:06:05"),
+        ("2022-05-04T05:06:37.89Z", TimeGrain.THIRTY_SECONDS, "2022-05-04 05:06:30"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.MINUTE, "2022-05-04 05:06:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_MINUTES, "2022-05-04 05:05:00"),
+        ("2022-05-04T05:36:07.89Z", TimeGrain.TEN_MINUTES, "2022-05-04 05:30:00"),
+        ("2022-05-04T05:46:07.89Z", TimeGrain.FIFTEEN_MINUTES, "2022-05-04 05:45:00"),
+        ("2022-05-04T05:36:07.89Z", TimeGrain.THIRTY_MINUTES, "2022-05-04 05:30:00"),
+        ("2022-05-04T05:36:07.89Z", TimeGrain.HALF_HOUR, "2022-05-04 05:30:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.HOUR, "2022-05-04 05:00:00"),
+        ("2022-05-04T07:06:07.89Z", TimeGrain.SIX_HOURS, "2022-05-04 06:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.DAY, "2022-05-04 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.WEEK, "2022-05-01 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.MONTH, "2022-05-01 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.YEAR, "2022-01-01 00:00:00"),
         #  ___________________________
         # |         May 2022          |
         # |---------------------------|
@@ -61,27 +70,50 @@ def test_convert_dttm(
         # |---+---+---+---+---+---+---|
         # | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
         #  ---------------------------
-        # week ending Saturday
-        ("2022-05-04T05:06:07.89Z", "P1W/1970-01-03T00:00:00Z", "2022-05-07 00:00:00"),
-        # week ending Sunday
-        ("2022-05-04T05:06:07.89Z", "P1W/1970-01-04T00:00:00Z", "2022-05-08 00:00:00"),
-        # week starting Sunday
-        ("2022-05-04T05:06:07.89Z", "1969-12-28T00:00:00Z/P1W", "2022-05-01 00:00:00"),
-        # week starting Monday
-        ("2022-05-04T05:06:07.89Z", "1969-12-29T00:00:00Z/P1W", "2022-05-02 00:00:00"),
-        # tests for quarter
-        ("2022-01-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
-        ("2022-02-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
-        ("2022-03-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
-        ("2022-04-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
-        ("2022-05-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
-        ("2022-06-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
-        ("2022-07-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
-        ("2022-08-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
-        ("2022-09-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
-        ("2022-10-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
-        ("2022-11-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
-        ("2022-12-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
+        (
+            "2022-05-04T05:06:07.89Z",
+            TimeGrain.WEEK_ENDING_SATURDAY,
+            "2022-05-07 00:00:00",
+        ),
+        (
+            "2022-05-04T05:06:07.89Z",
+            TimeGrain.WEEK_ENDING_SUNDAY,
+            "2022-05-08 00:00:00",
+        ),
+        (
+            "2022-05-04T05:06:07.89Z",
+            TimeGrain.WEEK_STARTING_SUNDAY,
+            "2022-05-01 00:00:00",
+        ),
+        (
+            "2022-05-04T05:06:07.89Z",
+            TimeGrain.WEEK_STARTING_MONDAY,
+            "2022-05-02 00:00:00",
+        ),
+        ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+        ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+        ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+        ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+        ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+        ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+        ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+        ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+        ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+        ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+        ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+        ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+        ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+        ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+        ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+        ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+        ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+        ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+        ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+        ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+        ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
+        ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
+        ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
     ],
 )
 def test_time_grain_expressions(dttm: str, grain: str, expected: str) -> None: