You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/07/25 20:50:56 UTC
[superset] branch db-diagnostics updated: More tests
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch db-diagnostics
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/db-diagnostics by this push:
new b2f62ecd69 More tests
b2f62ecd69 is described below
commit b2f62ecd69763ff4ff4abb17fdff978dbbc003b2
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Tue Jul 25 13:50:44 2023 -0700
More tests
---
superset/cli/test_db.py | 125 +++++++++++++++++++++++++++++++++++--
superset/db_engine_specs/lib.py | 11 ++--
superset/db_engine_specs/sqlite.py | 35 +++++++++++
3 files changed, 163 insertions(+), 8 deletions(-)
diff --git a/superset/cli/test_db.py b/superset/cli/test_db.py
index 1a2942ae2c..9673a85599 100644
--- a/superset/cli/test_db.py
+++ b/superset/cli/test_db.py
@@ -26,6 +26,7 @@ from rich.console import Console
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.engine.url import make_url
+from sqlalchemy.exc import NoSuchModuleError
from superset.db_engine_specs import load_engine_specs
from superset.db_engine_specs.base import BaseEngineSpec
@@ -37,6 +38,48 @@ LIMIT_METHODS = {
"FETCH_MANY": "runs the query unmodified but fetchs only LIMIT rows from the cursor",
}
+DATABASE_DETAILS = {
+ "Supports JOINs": "joins",
+ "Supports subqueries": "subqueries",
+ "Allows aliases in the SELECT statement": "alias_in_select",
+ "Allows referencing aliases in the ORDER BY statement": "alias_in_orderby",
+ "Supports secondary time columns": "secondary_time_columns",
+ "Allows ommiting time filters from inline GROUP BYs": "time_groupby_inline",
+ "Able to use source column when an alias overshadows it": "alias_to_source_column",
+ "Allows aggregations in ORDER BY not present in the SELECT": "order_by_not_in_select",
+ "Allows expressions in ORDER BY": "expressions_in_orderby",
+ "Allows CTE as a subquery": "cte_in_subquery",
+ "Allows LIMIT clause (instead of TOP)": "limit_clause",
+ "Maximum column name": "max_column_name",
+ "Allows comments": "sql_comments",
+ "Colons must be escaped": "escaped_colons",
+}
+
+BASIC_FEATURES = {
+ "Masks/unmasks encrypted_extra": "masked_encrypted_extra",
+ "Has column type mappings": "column_type_mapping",
+ "Returns a list of function names": "function_names",
+}
+NICE_TO_HAVE_FEATURES = {
+ "Supports user impersonation": "user_impersonation",
+ "Support file upload": "file_upload",
+ "Returns extra table metadata": "extra_table_metadata",
+ "Maps driver exceptions to Superset exceptions": "dbapi_exception_mapping",
+ "Parses error messages and returns Superset errors": "custom_errors",
+ "Supports changing the schema per-query": "dynamic_schema",
+ "Supports catalogs": "catalog",
+ "Supports changing the catalog per-query": "dynamic_catalog",
+ "Can be connected thru an SSH tunnel": "ssh_tunneling",
+ "Allows query to be canceled": "query_cancelation",
+ "Returns additional metrics on dataset creation": "get_metrics",
+ "Supports querying the latest partition only": "where_latest_partition",
+}
+ADVANCED_FEATURES = {
+ "Expands complex types (arrays, structs) into rows/columns": "expand_data",
+ "Supports query cost estimation": "query_cost_estimation",
+ "Supports validating SQL before running query": "sql_validation",
+}
+
@click.command()
@click.argument("sqlalchemy_uri")
@@ -57,7 +100,7 @@ def test_db(sqlalchemy_uri: str, raw_connect_args: str | None = None) -> None:
3. The database connectivity and performance.
It's useful for people developing DB engine specs and/or SQLAlchemy dialects, and
- also to test new DB API 2.0 drivers.
+ also to test new versions of DB API 2.0 drivers.
TODO:
@@ -115,7 +158,14 @@ def test_db_engine_spec(
"""
spec: Type[BaseEngineSpec] | None = None
for spec in load_engine_specs():
- if spec.supports_url(make_url(sqlalchemy_uri)):
+ try:
+ supported = spec.supports_url(make_url(sqlalchemy_uri))
+ except NoSuchModuleError:
+ console.print("[red]No SQLAlchemy dialect found for the URI!")
+ console.print("[bold]Exiting...")
+ sys.exit(1)
+
+ if supported:
if spec.__module__.startswith("superset.db_engine_specs"):
console.print(
f":thumbs_up: [green]Found DB engine spec: [bold]{spec.engine_name}"
@@ -142,11 +192,29 @@ def test_db_engine_spec(
console.print(" - Method used to apply LIMIT to queries:", info["limit_method"])
for k, v in LIMIT_METHODS.items():
console.print(f" - {k}: {v}")
- console.print(" - Supports JOINs: ", info["joins"])
+ for feature, key in DATABASE_DETAILS.items():
+ console.print(f" - {feature}:", info[key])
+ console.print("[bold]Checking for basic features...")
console.print("Supported time grains:")
for k, v in info["time_grains"].items():
- console.print(f" - {k}: {v}")
+ score = " (+1)" if v else ""
+ console.print(f" - {k}: {v}{score}")
+ for k, v in BASIC_FEATURES.items():
+ score = " (+10)" if info[v] else ""
+ console.print(f"{k}: {info[v]}{score}")
+
+ console.print("[bold]Checking for nice-to-have features...")
+ for k, v in NICE_TO_HAVE_FEATURES.items():
+ score = " (+10)" if info[v] else ""
+ console.print(f"{k}: {info[v]}{score}")
+
+ console.print("[bold]Checking for advanced features...")
+ for k, v in ADVANCED_FEATURES.items():
+ score = " (+10)" if info[v] else ""
+ console.print(f"{k}: {info[v]}{score}")
+
+ console.print("[bold]Overall score: {score}/{max_score}".format(**info))
def test_sqlalchemy_dialect(
@@ -158,6 +226,54 @@ def test_sqlalchemy_dialect(
Test the SQLAlchemy dialect, making sure it supports everything Superset needs.
"""
engine = create_engine(sqlalchemy_uri, connect_args=connect_args)
+ dialect = engine.dialect
+
+ console.print("[bold]Checking functions used by the inspector...")
+ keys = [
+ "get_schema_names",
+ "get_table_names",
+ "get_view_names",
+ "get_indexes",
+ "get_table_comment",
+ "get_columns",
+ "get_unique_constraints",
+ "get_check_constraints",
+ "get_pk_constraint",
+ "get_foreign_keys",
+ ]
+ for key in keys:
+ console.print(f" - {key}:", hasattr(dialect, key))
+
+ console.print("[bold]Checking dialect attributes...")
+ if hasattr(dialect, "dbapi"):
+ console.print(f" - dbapi: [bold]{dialect.dbapi.__name__}")
+ else:
+ console.print(" - dbapi:", None)
+
+ attrs = [
+ "name",
+ "driver",
+ "supports_multivalues_insert",
+ ]
+ for attr in attrs:
+ console.print(f" - {attr}:", getattr(dialect, attr, None))
+
+ console.print("Supports do_ping:", hasattr(dialect, "do_ping"))
+ console.print(
+ "Can quote identifiers:",
+ hasattr(dialect, "identifier_preparer")
+ and hasattr(dialect.identifier_preparer, "quote"),
+ )
+
+ console.print(
+ "Doesn't require name normalization:",
+ not dialect.requires_name_normalize,
+ )
+ if dialect.requires_name_normalize:
+ console.print(
+ " - Implements denormalize_name:", hasattr(dialect, "denormalize_name")
+ )
+
return engine
@@ -169,4 +285,5 @@ def test_database_connectivity(console: Console, engine: Engine) -> None:
console.print(":thumbs_up: [green]Connected successfully!")
except Exception as ex: # pylint: disable=broad-except
console.print(f":thumbs_down: [red]Failed to connect: {ex}")
+ console.print("[bold]Exiting...")
sys.exit(1)
diff --git a/superset/db_engine_specs/lib.py b/superset/db_engine_specs/lib.py
index dbe1f102e9..d786f63f49 100644
--- a/superset/db_engine_specs/lib.py
+++ b/superset/db_engine_specs/lib.py
@@ -54,8 +54,8 @@ def diagnose(spec: Type[BaseEngineSpec]) -> dict[str, Any]:
"secondary_time_columns": spec.time_secondary_columns,
"time_groupby_inline": spec.time_groupby_inline,
"alias_to_source_column": not spec.allows_alias_to_source_column,
- "order_by_in_select": spec.allows_hidden_orderby_agg,
- "expression_in_orderby": spec.allows_hidden_cc_in_orderby,
+ "order_by_not_in_select": spec.allows_hidden_orderby_agg,
+ "expressions_in_orderby": spec.allows_hidden_cc_in_orderby,
"cte_in_subquery": spec.allows_cte_in_subquery,
"limit_clause": spec.allow_limit_clause,
"max_column_name": spec.max_column_name_length,
@@ -119,6 +119,9 @@ def diagnose(spec: Type[BaseEngineSpec]) -> dict[str, Any]:
score += sum(10 * int(output[key]) for key in nice_to_have)
score += sum(10 * int(output[key]) for key in advanced)
output["score"] = score
+ output["max_score"] = (
+ len(TimeGrain) + 10 * len(basic) + 10 * len(nice_to_have) + 10 * len(advanced)
+ )
return output
@@ -154,8 +157,8 @@ def generate_table() -> List[Tuple[Any, ...]]:
"secondary_time_columns",
"time_groupby_inline",
"alias_to_source_column",
- "order_by_in_select",
- "expression_in_orderby",
+ "order_by_not_in_select",
+ "expressions_in_orderby",
"cte_in_subquery",
"limit_clause",
"max_column_name",
diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py
index 06d5537509..49b9cdb2e3 100644
--- a/superset/db_engine_specs/sqlite.py
+++ b/superset/db_engine_specs/sqlite.py
@@ -42,8 +42,36 @@ class SqliteEngineSpec(BaseEngineSpec):
_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+ TimeGrain.FIVE_SECONDS: (
+ "DATETIME({col}, printf('-%d seconds', "
+ "CAST(strftime('%S', {col}) AS INT) % 5))"
+ ),
+ TimeGrain.THIRTY_SECONDS: (
+ "DATETIME({col}, printf('-%d seconds', "
+ "CAST(strftime('%S', {col}) AS INT) % 30))"
+ ),
TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+ TimeGrain.FIVE_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 5))"
+ ),
+ TimeGrain.TEN_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 10))"
+ ),
+ TimeGrain.FIFTEEN_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 15))"
+ ),
+ TimeGrain.THIRTY_MINUTES: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+ "CAST(strftime('%M', {col}) AS INT) % 30))"
+ ),
TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+ TimeGrain.SIX_HOURS: (
+ "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', "
+ "CAST(strftime('%H', {col}) AS INT) % 6))"
+ ),
TimeGrain.DAY: "DATETIME({col}, 'start of day')",
TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
-strftime('%w', {col}) || ' days')",
@@ -62,6 +90,13 @@ class SqliteEngineSpec(BaseEngineSpec):
"DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
),
}
+ # not sure why these are differnet
+ _time_grain_expressions.update(
+ {
+ TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES],
+ TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER],
+ }
+ )
custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
COLUMN_DOES_NOT_EXIST_REGEX: (