You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2022/03/29 22:23:21 UTC

[GitHub] [superset] ktmud commented on a change in pull request #19416: perf: improve perf in SIP-68 migration

ktmud commented on a change in pull request #19416:
URL: https://github.com/apache/superset/pull/19416#discussion_r837954242



##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -36,21 +37,82 @@
 from sqlalchemy.orm import backref, relationship, Session
 from sqlalchemy.schema import UniqueConstraint
 from sqlalchemy_utils import UUIDType
+from sqloxide import parse_sql
 
 from superset import app, db
 from superset.connectors.sqla.models import ADDITIVE_METRIC_TYPES
 from superset.extensions import encrypted_field_factory
-from superset.sql_parse import ParsedQuery
+from superset.sql_parse import ParsedQuery, Table
 
 # revision identifiers, used by Alembic.
 revision = "b8d3a24d9131"
 down_revision = "5afbb1a5849b"
 
+logger = logging.getLogger("alembic")
+
 Base = declarative_base()
 custom_password_store = app.config["SQLALCHEMY_CUSTOM_PASSWORD_STORE"]
 DB_CONNECTION_MUTATOR = app.config["DB_CONNECTION_MUTATOR"]
 
 
+# mapping between sqloxide and SQLAlchemy dialects
+sqloxide_dialects = {
+    "ansi": {"trino", "trinonative", "presto"},
+    "hive": {"hive", "databricks"},
+    "ms": {"mssql"},
+    "mysql": {"mysql"},
+    "postgres": {
+        "cockroachdb",
+        "hana",
+        "netezza",
+        "postgres",
+        "postgresql",
+        "redshift",
+        "vertica",
+    },
+    "snowflake": {"snowflake"},
+    "sqlite": {"sqlite", "gsheets", "shillelagh"},
+    "clickhouse": {"clickhouse"},
+}
+
+
+def find_nodes_by_key(element: Any, target: str) -> Iterator[Any]:
+    """
+    Find all nodes in a SQL tree matching a given key.
+    """
+    if isinstance(element, list):
+        for child in element:
+            yield from find_nodes_by_key(child, target)
+    elif isinstance(element, dict):
+        for key, value in element.items():
+            if key == target:
+                yield value
+            else:
+                yield from find_nodes_by_key(value, target)
+
+
+def get_dependencies(expression: str, sqla_dialect: str) -> Set[Table]:
+    """
+    Return all the dependencies from a SQL expression.
+    """
+    dialect = "generic"
+    for dialect, sqla_dialects in sqloxide_dialects.items():
+        if sqla_dialect in sqla_dialects:
+            break
+    try:
+        tree = parse_sql(expression, dialect=dialect)
+    except Exception:  # pylint: disable=broad-except
+        logger.warning("Unable to parse query with sqloxide: %s", expression)
+        # fallback to sqlparse
+        parsed = ParsedQuery(expression)
+        return parsed.tables
+
+    return {
+        Table(*[part["value"] for part in table["name"][::-1]])
+        for table in find_nodes_by_key(tree, "Table")
+    }

Review comment:
       Maybe this can be moved to `/superset/migrations/shared/utils.py`? Then later moved to `superset.sql_parse` once we start swapping out sqlparse in other places as well...

##########
File path: tests/unit_tests/migrations/versions/b8d3a24d9131_new_dataset_models_test.py
##########
@@ -0,0 +1,42 @@
+"""
+Test the SIP-68 migration.
+"""
+# pylint: disable=import-outside-toplevel, unused-argument
+
+from pytest_mock import MockerFixture
+
+from superset.sql_parse import Table
+
+
+def test_get_dependencies(mocker: MockerFixture, app_context: None) -> None:

Review comment:
       Thanks for adding the tests! 

##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -36,21 +37,82 @@
 from sqlalchemy.orm import backref, relationship, Session
 from sqlalchemy.schema import UniqueConstraint
 from sqlalchemy_utils import UUIDType
+from sqloxide import parse_sql
 
 from superset import app, db
 from superset.connectors.sqla.models import ADDITIVE_METRIC_TYPES
 from superset.extensions import encrypted_field_factory
-from superset.sql_parse import ParsedQuery
+from superset.sql_parse import ParsedQuery, Table
 
 # revision identifiers, used by Alembic.
 revision = "b8d3a24d9131"
 down_revision = "5afbb1a5849b"
 
+logger = logging.getLogger("alembic")
+
 Base = declarative_base()
 custom_password_store = app.config["SQLALCHEMY_CUSTOM_PASSWORD_STORE"]
 DB_CONNECTION_MUTATOR = app.config["DB_CONNECTION_MUTATOR"]
 
 
+# mapping between sqloxide and SQLAlchemy dialects
+sqloxide_dialects = {
+    "ansi": {"trino", "trinonative", "presto"},
+    "hive": {"hive", "databricks"},
+    "ms": {"mssql"},
+    "mysql": {"mysql"},
+    "postgres": {
+        "cockroachdb",
+        "hana",
+        "netezza",
+        "postgres",
+        "postgresql",
+        "redshift",
+        "vertica",
+    },
+    "snowflake": {"snowflake"},
+    "sqlite": {"sqlite", "gsheets", "shillelagh"},
+    "clickhouse": {"clickhouse"},
+}
+
+
+def find_nodes_by_key(element: Any, target: str) -> Iterator[Any]:
+    """
+    Find all nodes in a SQL tree matching a given key.
+    """
+    if isinstance(element, list):
+        for child in element:
+            yield from find_nodes_by_key(child, target)
+    elif isinstance(element, dict):
+        for key, value in element.items():
+            if key == target:
+                yield value
+            else:
+                yield from find_nodes_by_key(value, target)
+
+
+def get_dependencies(expression: str, sqla_dialect: str) -> Set[Table]:

Review comment:
       ```suggestion
   def extra_table_references(sql_text: str, dialect: str) -> Set[Table]:
   ```
   
   Just to be more explicit




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org