You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/07/22 01:01:30 UTC

[superset] branch db-diagnostics updated (eebad62117 -> ac4c4dc5ee)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a change to branch db-diagnostics
in repository https://gitbox.apache.org/repos/asf/superset.git


 discard eebad62117 WIP
     new ac4c4dc5ee WIP

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (eebad62117)
            \
             N -- N -- N   refs/heads/db-diagnostics (ac4c4dc5ee)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 superset/db_engine_specs/lib.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)


[superset] 01/01: WIP

Posted by be...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch db-diagnostics
in repository https://gitbox.apache.org/repos/asf/superset.git

commit ac4c4dc5ee10fb9ce8f19d02ca5f78363ab8a718
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Fri Jul 21 17:45:52 2023 -0700

    WIP
---
 superset/db_engine_specs/README.md |  35 +++++++++++++
 superset/db_engine_specs/base.py   |   2 +-
 superset/db_engine_specs/lib.py    | 104 +++++++++++++++++++++++++++++++++++++
 3 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/superset/db_engine_specs/README.md b/superset/db_engine_specs/README.md
new file mode 100644
index 0000000000..863404f4a4
--- /dev/null
+++ b/superset/db_engine_specs/README.md
@@ -0,0 +1,35 @@
+Database engine specifications
+==============================
+
+Superset uses [SQLAlchemy](https://www.sqlalchemy.org/) as an abstraction layer for running queries and fetching metadata from tables (like column names and types). Unfortunately, while SQLAlchemy offers enough functionality to allow connecting Superset to dozens of databases, there are still implementation details that differ across them. Because of this, Superset has an additional abstraction on top of SQLAlchemy, called a "database engine specification" or, simply, "DB engine spec".
+
+DB engine specs were created initially because there's no SQL standard for computing aggregations at different time grains. For example, to compute a daily metric in Trino or Postgres we would run a query like this:
+
+```sql
+SELECT
+  date_trunc('day', CAST(time_column) AS TIMESTAMP) AS day,
+  COUNT(*) AS metric
+FROM
+  some_table
+GROUP BY
+  1
+```
+
+For MySQL, instead of using the `date_trunc` function, we would need to write:
+
+```sql
+SELECT
+  DATE(time_column) AS day,
+  COUNT(*) AS metric
+FROM
+  some_table
+GROUP BY
+  1
+```
+
+Over time, more and more functionality was added to DB engine specs, including validating SQL, estimating the cost of queries before they are run, and understanding the semantics of error messages. These are all described in detail in this document, and in the table below you can see a summary of what features are supported by each database.
+
+Note that DB engine specs are completely optional. Superset can connect to any database supported by SQLAlchemy (or 3rd party dialects) even if there's no DB engine spec associated with it. But DB engine specs greatly improve the experience of working with a database in Superset.
+
+
+
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index af24e54790..53d4018272 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -358,7 +358,7 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
 
     force_column_alias_quotes = False
     arraysize = 0
-    max_column_name_length = 0
+    max_column_name_length: int | None = None
     try_remove_schema_from_table_name = True  # pylint: disable=invalid-name
     run_multiple_statements_as_one = False
     custom_errors: dict[
diff --git a/superset/db_engine_specs/lib.py b/superset/db_engine_specs/lib.py
new file mode 100644
index 0000000000..bd9bee0222
--- /dev/null
+++ b/superset/db_engine_specs/lib.py
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Type
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec
+
+
+def basic_diagnostics(spec: Type[BaseEngineSpec]) -> dict[str, Any]:
+    """
+    Run basic diagnostics on a given DB engine spec.
+    """
+    from superset.sql_validators.postgres import PostgreSQLValidator
+    from superset.sql_validators.presto_db import PrestoDBSQLValidator
+
+    sql_validators = {
+        "presto": PrestoDBSQLValidator,
+        "postgresql": PostgreSQLValidator,
+    }
+
+    output = {}
+
+    output["time_grains"] = {}
+    supported_time_grain_expressions = spec.get_time_grain_expressions()
+    for time_grain in TimeGrain:
+        output["time_grains"][time_grain.name] = (
+            time_grain in supported_time_grain_expressions
+        )
+
+    output.update(
+        {
+            "limit_method": spec.limit_method,
+            "joins": spec.allows_joins,
+            "subqueries": spec.allows_subqueries,
+            "alias_in_select": spec.allows_alias_in_select,
+            "alias_in_orderby": spec.allows_alias_in_orderby,
+            "secondary_time_columns": spec.time_secondary_columns,
+            "time_groupby_inline": spec.time_groupby_inline,
+            "alias_to_source_column": not spec.allows_alias_to_source_column,
+            "order_by_in_select": spec.allows_hidden_orderby_agg,
+            "expression_in_orderby": spec.allows_hidden_cc_in_orderby,
+            "cte_in_subquery": spec.allows_cte_in_subquery,
+            "limit_clause": spec.allows_limit_clause,
+            "max_column_name": spec.max_column_name_length,
+            "sql_comments": spec.allows_sql_comments,
+            "escaped_colons": spec.allows_escaped_colons,
+            "masked_encrypted_extra": "mask_encrypted_extra" in spec.__dict__,
+            "column_type_mapping": bool(spec.column_type_mappings),
+            "function_names": "get_function_names" in spec.__dict__,
+            # there are multiple ways of implementing user impersonation
+            "user_impersonation": (
+                "update_impersonation_config" in spec.__dict__
+                or "get_url_for_impersonation" in spec.__dict__
+            ),
+            "file_upload": spec.supports_file_upload,
+            "extra_table_metadata": "extra_table_metadata" in spec.__dict__,
+            "dbapi_exception_mapping": "get_dbapi_exception_mappin" in spec.__dict__,
+            "custom_errors": (
+                "extract_errors" in spec.__dict__ or "custom_errors" in spec.__dict__
+            ),
+            "dynamic_schema": spec.supports_dynamic_schema,
+            "catalog": spec.supports_catalog,
+            "dynamic_catalog": spec.supports_dynamic_catalog,
+            "ssh_tunneling": not spec.disable_ssh_tunneling,
+            "query_cancelation": (
+                "cancel_query" in spec.__dict__
+                or "has_implicit_cancel" in spec.__dict__
+            ),
+            "get_metrics": "get_metrics" in spec.__dict__,
+            "where_latest_partition": "where_latest_partition" in spec.__dict__,
+            "expand_data": "expand_data" in spec.__dict__,
+            "query_cost_estimation": "estimate_query_cost" in spec.__dict__,
+            # SQL validation is implemented in external classes
+            "sql_validation": spec.engine in sql_validators,
+        },
+    )
+
+    return output
+
+
+if __name__ == "__main__":
+    from pprint import pprint
+
+    from superset.app import create_app
+    from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
+
+    app = create_app()
+    with app.app_context():
+        pprint(basic_diagnostics(ShillelaghEngineSpec))