You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/08/07 23:00:03 UTC
[superset] branch master updated: feat: improve SQLite DB engine spec (#24909)
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 85a7d5cb3e feat: improve SQLite DB engine spec (#24909)
85a7d5cb3e is described below
commit 85a7d5cb3ebe833cfc2980f0846f15bb7ce1dd01
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Mon Aug 7 15:59:56 2023 -0700
feat: improve SQLite DB engine spec (#24909)
---
superset/db_engine_specs/gsheets.py | 7 +-
superset/db_engine_specs/shillelagh.py | 18 +++
superset/db_engine_specs/sqlite.py | 175 +++++++++++++++++++++++-
tests/integration_tests/databases/api_tests.py | 132 +++++++++++++++++-
tests/unit_tests/db_engine_specs/test_sqlite.py | 88 ++++++++----
5 files changed, 383 insertions(+), 37 deletions(-)
diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py
index abf5bac48f..777499a8f9 100644
--- a/superset/db_engine_specs/gsheets.py
+++ b/superset/db_engine_specs/gsheets.py
@@ -32,7 +32,7 @@ from typing_extensions import TypedDict
from superset import security_manager
from superset.constants import PASSWORD_MASK
from superset.databases.schemas import encrypted_field_properties, EncryptedString
-from superset.db_engine_specs.sqlite import SqliteEngineSpec
+from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
if TYPE_CHECKING:
@@ -65,14 +65,13 @@ class GSheetsPropertiesType(TypedDict):
catalog: dict[str, str]
-class GSheetsEngineSpec(SqliteEngineSpec):
+class GSheetsEngineSpec(ShillelaghEngineSpec):
"""Engine for Google spreadsheets"""
- engine = "gsheets"
engine_name = "Google Sheets"
+ engine = "gsheets"
allows_joins = True
allows_subqueries = True
- disable_ssh_tunneling = True
parameters_schema = GSheetsParametersSchema()
default_driver = "apsw"
diff --git a/superset/db_engine_specs/shillelagh.py b/superset/db_engine_specs/shillelagh.py
index 3730122448..61820824b0 100644
--- a/superset/db_engine_specs/shillelagh.py
+++ b/superset/db_engine_specs/shillelagh.py
@@ -14,8 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
from superset.db_engine_specs.sqlite import SqliteEngineSpec
+if TYPE_CHECKING:
+ from superset.models.core import Database
+
class ShillelaghEngineSpec(SqliteEngineSpec):
"""Engine for shillelagh"""
@@ -28,3 +35,14 @@ class ShillelaghEngineSpec(SqliteEngineSpec):
allows_joins = True
allows_subqueries = True
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ return super().get_function_names(database) + [
+ "sleep",
+ "version",
+ "get_metadata",
+ ]
diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py
index 06d5537509..c06660abbd 100644
--- a/superset/db_engine_specs/sqlite.py
+++ b/superset/db_engine_specs/sqlite.py
@@ -14,10 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
+from __future__ import annotations
+
import re
from datetime import datetime
from re import Pattern
-from typing import Any, Optional, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
from flask_babel import gettext as __
from sqlalchemy import types
@@ -39,11 +42,41 @@ class SqliteEngineSpec(BaseEngineSpec):
engine = "sqlite"
engine_name = "SQLite"
+ disable_ssh_tunneling = True
+
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+ TimeGrain.FIVE_SECONDS: (
+ "DATETIME({col}, printf('-%d seconds', "
+ "CAST(strftime('%S', {col}) AS INT) % 5))"
+ ),
+ TimeGrain.THIRTY_SECONDS: (
+ "DATETIME({col}, printf('-%d seconds', "
+ "CAST(strftime('%S', {col}) AS INT) % 30))"
+ ),
TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+ TimeGrain.FIVE_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 5))"
+ ),
+ TimeGrain.TEN_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 10))"
+ ),
+ TimeGrain.FIFTEEN_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 15))"
+ ),
+ TimeGrain.THIRTY_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 30))"
+ ),
TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+ TimeGrain.SIX_HOURS: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', "
+ "CAST(strftime('%H', {col}) AS INT) % 6))"
+ ),
TimeGrain.DAY: "DATETIME({col}, 'start of day')",
TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
-strftime('%w', {col}) || ' days')",
@@ -62,6 +95,13 @@ class SqliteEngineSpec(BaseEngineSpec):
"DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
),
}
+ # not sure why these are diffenret
+ _time_grain_expressions.update(
+ {
+ TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES],
+ TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER],
+ }
+ )
custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
COLUMN_DOES_NOT_EXIST_REGEX: (
@@ -77,8 +117,8 @@ class SqliteEngineSpec(BaseEngineSpec):
@classmethod
def convert_dttm(
- cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, Any]] = None
- ) -> Optional[str]:
+ cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None = None
+ ) -> str | None:
sqla_type = cls.get_sqla_column_type(target_type)
if isinstance(sqla_type, (types.String, types.DateTime)):
return f"""'{dttm.isoformat(sep=" ", timespec="seconds")}'"""
@@ -86,7 +126,134 @@ class SqliteEngineSpec(BaseEngineSpec):
@classmethod
def get_table_names(
- cls, database: "Database", inspector: Inspector, schema: Optional[str]
+ cls,
+ database: Database,
+ inspector: Inspector,
+ schema: str | None,
) -> set[str]:
"""Need to disregard the schema for Sqlite"""
return set(inspector.get_table_names())
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ """
+ Return function names.
+ """
+ return [
+ "abs",
+ "acos",
+ "acosh",
+ "asin",
+ "asinh",
+ "atan",
+ "atan2",
+ "atanh",
+ "avg",
+ "ceil",
+ "ceiling",
+ "changes",
+ "char",
+ "coalesce",
+ "cos",
+ "cosh",
+ "count",
+ "cume_dist",
+ "date",
+ "datetime",
+ "degrees",
+ "dense_rank",
+ "exp",
+ "first_value",
+ "floor",
+ "format",
+ "glob",
+ "group_concat",
+ "hex",
+ "ifnull",
+ "iif",
+ "instr",
+ "json",
+ "json_array",
+ "json_array_length",
+ "json_each",
+ "json_error_position",
+ "json_extract",
+ "json_group_array",
+ "json_group_object",
+ "json_insert",
+ "json_object",
+ "json_patch",
+ "json_quote",
+ "json_remove",
+ "json_replace",
+ "json_set",
+ "json_tree",
+ "json_type",
+ "json_valid",
+ "julianday",
+ "lag",
+ "last_insert_rowid",
+ "last_value",
+ "lead",
+ "length",
+ "like",
+ "likelihood",
+ "likely",
+ "ln",
+ "load_extension",
+ "log",
+ "log10",
+ "log2",
+ "lower",
+ "ltrim",
+ "max",
+ "min",
+ "mod",
+ "nth_value",
+ "ntile",
+ "nullif",
+ "percent_rank",
+ "pi",
+ "pow",
+ "power",
+ "printf",
+ "quote",
+ "radians",
+ "random",
+ "randomblob",
+ "rank",
+ "replace",
+ "round",
+ "row_number",
+ "rtrim",
+ "sign",
+ "sin",
+ "sinh",
+ "soundex",
+ "sqlite_compileoption_get",
+ "sqlite_compileoption_used",
+ "sqlite_offset",
+ "sqlite_source_id",
+ "sqlite_version",
+ "sqrt",
+ "strftime",
+ "substr",
+ "substring",
+ "sum",
+ "tan",
+ "tanh",
+ "time",
+ "total_changes",
+ "trim",
+ "trunc",
+ "typeof",
+ "unhex",
+ "unicode",
+ "unixepoch",
+ "unlikely",
+ "upper",
+ "zeroblob",
+ ]
diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py
index bd39d96574..8bf4867d01 100644
--- a/tests/integration_tests/databases/api_tests.py
+++ b/tests/integration_tests/databases/api_tests.py
@@ -2836,7 +2836,7 @@ class TestDatabaseApi(SupersetTestCase):
)
def test_function_names(self, mock_get_function_names):
example_db = get_example_database()
- if example_db.backend in {"hive", "presto"}:
+ if example_db.backend in {"hive", "presto", "sqlite"}:
return
mock_get_function_names.return_value = ["AVG", "MAX", "SUM"]
@@ -2850,6 +2850,136 @@ class TestDatabaseApi(SupersetTestCase):
assert rv.status_code == 200
assert response == {"function_names": ["AVG", "MAX", "SUM"]}
+ def test_function_names_sqlite(self):
+ example_db = get_example_database()
+ if example_db.backend != "sqlite":
+ return
+
+ self.login(username="admin")
+ uri = "api/v1/database/1/function_names/"
+
+ rv = self.client.get(uri)
+ response = json.loads(rv.data.decode("utf-8"))
+
+ assert rv.status_code == 200
+ assert response == {
+ "function_names": [
+ "abs",
+ "acos",
+ "acosh",
+ "asin",
+ "asinh",
+ "atan",
+ "atan2",
+ "atanh",
+ "avg",
+ "ceil",
+ "ceiling",
+ "changes",
+ "char",
+ "coalesce",
+ "cos",
+ "cosh",
+ "count",
+ "cume_dist",
+ "date",
+ "datetime",
+ "degrees",
+ "dense_rank",
+ "exp",
+ "first_value",
+ "floor",
+ "format",
+ "glob",
+ "group_concat",
+ "hex",
+ "ifnull",
+ "iif",
+ "instr",
+ "json",
+ "json_array",
+ "json_array_length",
+ "json_each",
+ "json_error_position",
+ "json_extract",
+ "json_group_array",
+ "json_group_object",
+ "json_insert",
+ "json_object",
+ "json_patch",
+ "json_quote",
+ "json_remove",
+ "json_replace",
+ "json_set",
+ "json_tree",
+ "json_type",
+ "json_valid",
+ "julianday",
+ "lag",
+ "last_insert_rowid",
+ "last_value",
+ "lead",
+ "length",
+ "like",
+ "likelihood",
+ "likely",
+ "ln",
+ "load_extension",
+ "log",
+ "log10",
+ "log2",
+ "lower",
+ "ltrim",
+ "max",
+ "min",
+ "mod",
+ "nth_value",
+ "ntile",
+ "nullif",
+ "percent_rank",
+ "pi",
+ "pow",
+ "power",
+ "printf",
+ "quote",
+ "radians",
+ "random",
+ "randomblob",
+ "rank",
+ "replace",
+ "round",
+ "row_number",
+ "rtrim",
+ "sign",
+ "sin",
+ "sinh",
+ "soundex",
+ "sqlite_compileoption_get",
+ "sqlite_compileoption_used",
+ "sqlite_offset",
+ "sqlite_source_id",
+ "sqlite_version",
+ "sqrt",
+ "strftime",
+ "substr",
+ "substring",
+ "sum",
+ "tan",
+ "tanh",
+ "time",
+ "total_changes",
+ "trim",
+ "trunc",
+ "typeof",
+ "unhex",
+ "unicode",
+ "unixepoch",
+ "unlikely",
+ "upper",
+ "zeroblob",
+ ]
+ }
+
@mock.patch("superset.databases.api.get_available_engine_specs")
@mock.patch("superset.databases.api.app")
def test_available(self, app, get_available_engine_specs):
diff --git a/tests/unit_tests/db_engine_specs/test_sqlite.py b/tests/unit_tests/db_engine_specs/test_sqlite.py
index 11ce174c0f..a31992afcf 100644
--- a/tests/unit_tests/db_engine_specs/test_sqlite.py
+++ b/tests/unit_tests/db_engine_specs/test_sqlite.py
@@ -21,6 +21,7 @@ from typing import Optional
import pytest
from sqlalchemy.engine import create_engine
+from superset.constants import TimeGrain
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
from tests.unit_tests.fixtures.common import dttm
@@ -47,13 +48,21 @@ def test_convert_dttm(
@pytest.mark.parametrize(
"dttm,grain,expected",
[
- ("2022-05-04T05:06:07.89Z", "PT1S", "2022-05-04 05:06:07"),
- ("2022-05-04T05:06:07.89Z", "PT1M", "2022-05-04 05:06:00"),
- ("2022-05-04T05:06:07.89Z", "PT1H", "2022-05-04 05:00:00"),
- ("2022-05-04T05:06:07.89Z", "P1D", "2022-05-04 00:00:00"),
- ("2022-05-04T05:06:07.89Z", "P1W", "2022-05-01 00:00:00"),
- ("2022-05-04T05:06:07.89Z", "P1M", "2022-05-01 00:00:00"),
- ("2022-05-04T05:06:07.89Z", "P1Y", "2022-01-01 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.SECOND, "2022-05-04 05:06:07"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_SECONDS, "2022-05-04 05:06:05"),
+ ("2022-05-04T05:06:37.89Z", TimeGrain.THIRTY_SECONDS, "2022-05-04 05:06:30"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.MINUTE, "2022-05-04 05:06:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.FIVE_MINUTES, "2022-05-04 05:05:00"),
+ ("2022-05-04T05:36:07.89Z", TimeGrain.TEN_MINUTES, "2022-05-04 05:30:00"),
+ ("2022-05-04T05:46:07.89Z", TimeGrain.FIFTEEN_MINUTES, "2022-05-04 05:45:00"),
+ ("2022-05-04T05:36:07.89Z", TimeGrain.THIRTY_MINUTES, "2022-05-04 05:30:00"),
+ ("2022-05-04T05:36:07.89Z", TimeGrain.HALF_HOUR, "2022-05-04 05:30:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.HOUR, "2022-05-04 05:00:00"),
+ ("2022-05-04T07:06:07.89Z", TimeGrain.SIX_HOURS, "2022-05-04 06:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.DAY, "2022-05-04 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.WEEK, "2022-05-01 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.MONTH, "2022-05-01 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.YEAR, "2022-01-01 00:00:00"),
# ___________________________
# | May 2022 |
# |---------------------------|
@@ -61,27 +70,50 @@ def test_convert_dttm(
# |---+---+---+---+---+---+---|
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
# ---------------------------
- # week ending Saturday
- ("2022-05-04T05:06:07.89Z", "P1W/1970-01-03T00:00:00Z", "2022-05-07 00:00:00"),
- # week ending Sunday
- ("2022-05-04T05:06:07.89Z", "P1W/1970-01-04T00:00:00Z", "2022-05-08 00:00:00"),
- # week starting Sunday
- ("2022-05-04T05:06:07.89Z", "1969-12-28T00:00:00Z/P1W", "2022-05-01 00:00:00"),
- # week starting Monday
- ("2022-05-04T05:06:07.89Z", "1969-12-29T00:00:00Z/P1W", "2022-05-02 00:00:00"),
- # tests for quarter
- ("2022-01-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
- ("2022-02-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
- ("2022-03-04T05:06:07.89Z", "P3M", "2022-01-01 00:00:00"),
- ("2022-04-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
- ("2022-05-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
- ("2022-06-04T05:06:07.89Z", "P3M", "2022-04-01 00:00:00"),
- ("2022-07-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
- ("2022-08-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
- ("2022-09-04T05:06:07.89Z", "P3M", "2022-07-01 00:00:00"),
- ("2022-10-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
- ("2022-11-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
- ("2022-12-04T05:06:07.89Z", "P3M", "2022-10-01 00:00:00"),
+ (
+ "2022-05-04T05:06:07.89Z",
+ TimeGrain.WEEK_ENDING_SATURDAY,
+ "2022-05-07 00:00:00",
+ ),
+ (
+ "2022-05-04T05:06:07.89Z",
+ TimeGrain.WEEK_ENDING_SUNDAY,
+ "2022-05-08 00:00:00",
+ ),
+ (
+ "2022-05-04T05:06:07.89Z",
+ TimeGrain.WEEK_STARTING_SUNDAY,
+ "2022-05-01 00:00:00",
+ ),
+ (
+ "2022-05-04T05:06:07.89Z",
+ TimeGrain.WEEK_STARTING_MONDAY,
+ "2022-05-02 00:00:00",
+ ),
+ ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+ ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+ ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-01-01 00:00:00"),
+ ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+ ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-04-01 00:00:00"),
+ ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+ ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+ ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-07-01 00:00:00"),
+ ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+ ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+ ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER_YEAR, "2022-10-01 00:00:00"),
+ ("2022-01-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+ ("2022-02-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+ ("2022-03-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-01-01 00:00:00"),
+ ("2022-04-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+ ("2022-05-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+ ("2022-06-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-04-01 00:00:00"),
+ ("2022-07-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+ ("2022-08-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+ ("2022-09-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-07-01 00:00:00"),
+ ("2022-10-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
+ ("2022-11-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
+ ("2022-12-04T05:06:07.89Z", TimeGrain.QUARTER, "2022-10-01 00:00:00"),
],
)
def test_time_grain_expressions(dttm: str, grain: str, expected: str) -> None: