You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/07/25 20:50:56 UTC

[superset] branch db-diagnostics updated: More tests

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch db-diagnostics
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/db-diagnostics by this push:
     new b2f62ecd69 More tests
b2f62ecd69 is described below

commit b2f62ecd69763ff4ff4abb17fdff978dbbc003b2
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Tue Jul 25 13:50:44 2023 -0700

    More tests
---
 superset/cli/test_db.py            | 125 +++++++++++++++++++++++++++++++++++--
 superset/db_engine_specs/lib.py    |  11 ++--
 superset/db_engine_specs/sqlite.py |  35 +++++++++++
 3 files changed, 163 insertions(+), 8 deletions(-)

diff --git a/superset/cli/test_db.py b/superset/cli/test_db.py
index 1a2942ae2c..9673a85599 100644
--- a/superset/cli/test_db.py
+++ b/superset/cli/test_db.py
@@ -26,6 +26,7 @@ from rich.console import Console
 from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 from sqlalchemy.engine.url import make_url
+from sqlalchemy.exc import NoSuchModuleError
 
 from superset.db_engine_specs import load_engine_specs
 from superset.db_engine_specs.base import BaseEngineSpec
@@ -37,6 +38,48 @@ LIMIT_METHODS = {
     "FETCH_MANY": "runs the query unmodified but fetchs only LIMIT rows from the cursor",
 }
 
+DATABASE_DETAILS = {
+    "Supports JOINs": "joins",
+    "Supports subqueries": "subqueries",
+    "Allows aliases in the SELECT statement": "alias_in_select",
+    "Allows referencing aliases in the ORDER BY statement": "alias_in_orderby",
+    "Supports secondary time columns": "secondary_time_columns",
+    "Allows ommiting time filters from inline GROUP BYs": "time_groupby_inline",
+    "Able to use source column when an alias overshadows it": "alias_to_source_column",
+    "Allows aggregations in ORDER BY not present in the SELECT": "order_by_not_in_select",
+    "Allows expressions in ORDER BY": "expressions_in_orderby",
+    "Allows CTE as a subquery": "cte_in_subquery",
+    "Allows LIMIT clause (instead of TOP)": "limit_clause",
+    "Maximum column name": "max_column_name",
+    "Allows comments": "sql_comments",
+    "Colons must be escaped": "escaped_colons",
+}
+
+BASIC_FEATURES = {
+    "Masks/unmasks encrypted_extra": "masked_encrypted_extra",
+    "Has column type mappings": "column_type_mapping",
+    "Returns a list of function names": "function_names",
+}
+NICE_TO_HAVE_FEATURES = {
+    "Supports user impersonation": "user_impersonation",
+    "Support file upload": "file_upload",
+    "Returns extra table metadata": "extra_table_metadata",
+    "Maps driver exceptions to Superset exceptions": "dbapi_exception_mapping",
+    "Parses error messages and returns Superset errors": "custom_errors",
+    "Supports changing the schema per-query": "dynamic_schema",
+    "Supports catalogs": "catalog",
+    "Supports changing the catalog per-query": "dynamic_catalog",
+    "Can be connected thru an SSH tunnel": "ssh_tunneling",
+    "Allows query to be canceled": "query_cancelation",
+    "Returns additional metrics on dataset creation": "get_metrics",
+    "Supports querying the latest partition only": "where_latest_partition",
+}
+ADVANCED_FEATURES = {
+    "Expands complex types (arrays, structs) into rows/columns": "expand_data",
+    "Supports query cost estimation": "query_cost_estimation",
+    "Supports validating SQL before running query": "sql_validation",
+}
+
 
 @click.command()
 @click.argument("sqlalchemy_uri")
@@ -57,7 +100,7 @@ def test_db(sqlalchemy_uri: str, raw_connect_args: str | None = None) -> None:
       3. The database connectivity and performance.
 
     It's useful for people developing DB engine specs and/or SQLAlchemy dialects, and
-    also to test new DB API 2.0 drivers.
+    also to test new versions of DB API 2.0 drivers.
 
     TODO:
 
@@ -115,7 +158,14 @@ def test_db_engine_spec(
     """
     spec: Type[BaseEngineSpec] | None = None
     for spec in load_engine_specs():
-        if spec.supports_url(make_url(sqlalchemy_uri)):
+        try:
+            supported = spec.supports_url(make_url(sqlalchemy_uri))
+        except NoSuchModuleError:
+            console.print("[red]No SQLAlchemy dialect found for the URI!")
+            console.print("[bold]Exiting...")
+            sys.exit(1)
+
+        if supported:
             if spec.__module__.startswith("superset.db_engine_specs"):
                 console.print(
                     f":thumbs_up: [green]Found DB engine spec: [bold]{spec.engine_name}"
@@ -142,11 +192,29 @@ def test_db_engine_spec(
     console.print("  - Method used to apply LIMIT to queries:", info["limit_method"])
     for k, v in LIMIT_METHODS.items():
         console.print(f"    - {k}: {v}")
-    console.print("  - Supports JOINs: ", info["joins"])
+    for feature, key in DATABASE_DETAILS.items():
+        console.print(f"  - {feature}:", info[key])
 
+    console.print("[bold]Checking for basic features...")
     console.print("Supported time grains:")
     for k, v in info["time_grains"].items():
-        console.print(f"  - {k}: {v}")
+        score = " (+1)" if v else ""
+        console.print(f"  - {k}: {v}{score}")
+    for k, v in BASIC_FEATURES.items():
+        score = " (+10)" if info[v] else ""
+        console.print(f"{k}: {info[v]}{score}")
+
+    console.print("[bold]Checking for nice-to-have features...")
+    for k, v in NICE_TO_HAVE_FEATURES.items():
+        score = " (+10)" if info[v] else ""
+        console.print(f"{k}: {info[v]}{score}")
+
+    console.print("[bold]Checking for advanced features...")
+    for k, v in ADVANCED_FEATURES.items():
+        score = " (+10)" if info[v] else ""
+        console.print(f"{k}: {info[v]}{score}")
+
+    console.print("[bold]Overall score: {score}/{max_score}".format(**info))
 
 
 def test_sqlalchemy_dialect(
@@ -158,6 +226,54 @@ def test_sqlalchemy_dialect(
     Test the SQLAlchemy dialect, making sure it supports everything Superset needs.
     """
     engine = create_engine(sqlalchemy_uri, connect_args=connect_args)
+    dialect = engine.dialect
+
+    console.print("[bold]Checking functions used by the inspector...")
+    keys = [
+        "get_schema_names",
+        "get_table_names",
+        "get_view_names",
+        "get_indexes",
+        "get_table_comment",
+        "get_columns",
+        "get_unique_constraints",
+        "get_check_constraints",
+        "get_pk_constraint",
+        "get_foreign_keys",
+    ]
+    for key in keys:
+        console.print(f"  - {key}:", hasattr(dialect, key))
+
+    console.print("[bold]Checking dialect attributes...")
+    if hasattr(dialect, "dbapi"):
+        console.print(f"  - dbapi: [bold]{dialect.dbapi.__name__}")
+    else:
+        console.print("  - dbapi:", None)
+
+    attrs = [
+        "name",
+        "driver",
+        "supports_multivalues_insert",
+    ]
+    for attr in attrs:
+        console.print(f"  - {attr}:", getattr(dialect, attr, None))
+
+    console.print("Supports do_ping:", hasattr(dialect, "do_ping"))
+    console.print(
+        "Can quote identifiers:",
+        hasattr(dialect, "identifier_preparer")
+        and hasattr(dialect.identifier_preparer, "quote"),
+    )
+
+    console.print(
+        "Doesn't require name normalization:",
+        not dialect.requires_name_normalize,
+    )
+    if dialect.requires_name_normalize:
+        console.print(
+            "  - Implements denormalize_name:", hasattr(dialect, "denormalize_name")
+        )
+
     return engine
 
 
@@ -169,4 +285,5 @@ def test_database_connectivity(console: Console, engine: Engine) -> None:
             console.print(":thumbs_up: [green]Connected successfully!")
         except Exception as ex:  # pylint: disable=broad-except
             console.print(f":thumbs_down: [red]Failed to connect: {ex}")
+            console.print("[bold]Exiting...")
             sys.exit(1)
diff --git a/superset/db_engine_specs/lib.py b/superset/db_engine_specs/lib.py
index dbe1f102e9..d786f63f49 100644
--- a/superset/db_engine_specs/lib.py
+++ b/superset/db_engine_specs/lib.py
@@ -54,8 +54,8 @@ def diagnose(spec: Type[BaseEngineSpec]) -> dict[str, Any]:
             "secondary_time_columns": spec.time_secondary_columns,
             "time_groupby_inline": spec.time_groupby_inline,
             "alias_to_source_column": not spec.allows_alias_to_source_column,
-            "order_by_in_select": spec.allows_hidden_orderby_agg,
-            "expression_in_orderby": spec.allows_hidden_cc_in_orderby,
+            "order_by_not_in_select": spec.allows_hidden_orderby_agg,
+            "expressions_in_orderby": spec.allows_hidden_cc_in_orderby,
             "cte_in_subquery": spec.allows_cte_in_subquery,
             "limit_clause": spec.allow_limit_clause,
             "max_column_name": spec.max_column_name_length,
@@ -119,6 +119,9 @@ def diagnose(spec: Type[BaseEngineSpec]) -> dict[str, Any]:
     score += sum(10 * int(output[key]) for key in nice_to_have)
     score += sum(10 * int(output[key]) for key in advanced)
     output["score"] = score
+    output["max_score"] = (
+        len(TimeGrain) + 10 * len(basic) + 10 * len(nice_to_have) + 10 * len(advanced)
+    )
 
     return output
 
@@ -154,8 +157,8 @@ def generate_table() -> List[Tuple[Any, ...]]:
         "secondary_time_columns",
         "time_groupby_inline",
         "alias_to_source_column",
-        "order_by_in_select",
-        "expression_in_orderby",
+        "order_by_not_in_select",
+        "expressions_in_orderby",
         "cte_in_subquery",
         "limit_clause",
         "max_column_name",
diff --git a/superset/db_engine_specs/sqlite.py b/superset/db_engine_specs/sqlite.py
index 06d5537509..49b9cdb2e3 100644
--- a/superset/db_engine_specs/sqlite.py
+++ b/superset/db_engine_specs/sqlite.py
@@ -42,8 +42,36 @@ class SqliteEngineSpec(BaseEngineSpec):
     _time_grain_expressions = {
         None: "{col}",
         TimeGrain.SECOND: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:%S', {col}))",
+        TimeGrain.FIVE_SECONDS: (
+            "DATETIME({col}, printf('-%d seconds', "
+            "CAST(strftime('%S', {col}) AS INT) % 5))"
+        ),
+        TimeGrain.THIRTY_SECONDS: (
+            "DATETIME({col}, printf('-%d seconds', "
+            "CAST(strftime('%S', {col}) AS INT) % 30))"
+        ),
         TimeGrain.MINUTE: "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}))",
+        TimeGrain.FIVE_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 5))"
+        ),
+        TimeGrain.TEN_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 10))"
+        ),
+        TimeGrain.FIFTEEN_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 15))"
+        ),
+        TimeGrain.THIRTY_MINUTES: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:%M:00', {col}), printf('-%d minutes', "
+            "CAST(strftime('%M', {col}) AS INT) % 30))"
+        ),
         TimeGrain.HOUR: "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}))",
+        TimeGrain.SIX_HOURS: (
+            "DATETIME(STRFTIME('%Y-%m-%dT%H:00:00', {col}), printf('-%d hours', "
+            "CAST(strftime('%H', {col}) AS INT) % 6))"
+        ),
         TimeGrain.DAY: "DATETIME({col}, 'start of day')",
         TimeGrain.WEEK: "DATETIME({col}, 'start of day', \
             -strftime('%w', {col}) || ' days')",
@@ -62,6 +90,13 @@ class SqliteEngineSpec(BaseEngineSpec):
             "DATETIME({col}, 'start of day', 'weekday 1', '-7 days')"
         ),
     }
+    # not sure why these are differnet
+    _time_grain_expressions.update(
+        {
+            TimeGrain.HALF_HOUR: _time_grain_expressions[TimeGrain.THIRTY_MINUTES],
+            TimeGrain.QUARTER_YEAR: _time_grain_expressions[TimeGrain.QUARTER],
+        }
+    )
 
     custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
         COLUMN_DOES_NOT_EXIST_REGEX: (