You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2022/03/29 09:48:34 UTC

[GitHub] [superset] ktmud commented on a change in pull request #19406: perf(alembic): paginize db migration for new dataset models

ktmud commented on a change in pull request #19406:
URL: https://github.com/apache/superset/pull/19406#discussion_r837266456



##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -262,18 +260,21 @@ def after_insert(target: SqlaTable) -> None:  # pylint: disable=too-many-locals
         columns.append(
             NewColumn(
                 name=column.column_name,
-                type=column.type or "Unknown",
-                expression=column.expression or conditional_quote(column.column_name),
                 description=column.description,
-                is_temporal=column.is_dttm,
+                external_url=target.external_url,
+                expression=column.expression or conditional_quote(column.column_name),
+                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_aggregation=False,
-                is_physical=column.expression is None or column.expression == "",
-                is_spatial=False,
-                is_partition=False,
                 is_increase_desired=True,
-                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_managed_externally=target.is_managed_externally,
-                external_url=target.external_url,
+                is_temporal=column.is_dttm,
+                is_spatial=False,
+                is_partition=False,
+                is_physical=(
+                    is_physical_table
+                    and (column.expression is None or column.expression == "")
+                ),
+                type=column.type or "Unknown",

Review comment:
       A-Z

##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -292,69 +293,62 @@ def after_insert(target: SqlaTable) -> None:  # pylint: disable=too-many-locals
         columns.append(
             NewColumn(
                 name=metric.metric_name,
-                type="Unknown",  # figuring this out would require a type inferrer
-                expression=metric.expression,
-                warning_text=metric.warning_text,
                 description=metric.description,
+                expression=metric.expression,
+                external_url=target.external_url,
+                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_aggregation=True,
                 is_additive=is_additive,
-                is_physical=False,
-                is_spatial=False,
-                is_partition=False,
                 is_increase_desired=True,
-                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_managed_externally=target.is_managed_externally,
-                external_url=target.external_url,
+                is_partition=False,
+                is_physical=False,
+                is_spatial=False,
+                is_temporal=False,
+                type="Unknown",  # figuring this out would require a type inferrer
+                warning_text=metric.warning_text,

Review comment:
       Ditto A-Z

##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -499,14 +484,46 @@ def upgrade():
         ),
     )
 
-    # migrate existing datasets to the new models
-    bind = op.get_bind()
-    session = db.Session(bind=bind)  # pylint: disable=no-member
+    with op.batch_alter_table("sl_columns") as batch_op:
+        batch_op.create_unique_constraint("uq_sl_columns_uuid", ["uuid"])
+
+    with op.batch_alter_table("sl_tables") as batch_op:
+        batch_op.create_unique_constraint("uq_sl_tables_uuid", ["uuid"])
+
+    with op.batch_alter_table("sl_datasets") as batch_op:
+        batch_op.create_unique_constraint("uq_sl_datasets_uuid", ["uuid"])
+        batch_op.create_unique_constraint(
+            "uq_sl_datasets_sqlatable_id", ["sqlatable_id"]
+        )

Review comment:
       Move constraints to the end to improve readability.

##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -232,13 +229,14 @@ def after_insert(target: SqlaTable) -> None:  # pylint: disable=too-many-locals
     """
     Copy old datasets to the new models.
     """
-    session = inspect(target).session
+    session: Session = inspect(target).session
+    database_id = target.database_id
+    is_physical_table = not target.sql

Review comment:
       `BaseDatasource` checks whether a table is physical/virtual by checking whether `table.sql` is falsy. I'm changing all `target.sql is None` in this script to keep it consistent with current behavior.

##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -292,69 +293,62 @@ def after_insert(target: SqlaTable) -> None:  # pylint: disable=too-many-locals
         columns.append(
             NewColumn(
                 name=metric.metric_name,
-                type="Unknown",  # figuring this out would require a type inferrer
-                expression=metric.expression,
-                warning_text=metric.warning_text,
                 description=metric.description,
+                expression=metric.expression,
+                external_url=target.external_url,
+                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_aggregation=True,
                 is_additive=is_additive,
-                is_physical=False,
-                is_spatial=False,
-                is_partition=False,
                 is_increase_desired=True,
-                extra_json=json.dumps(extra_json) if extra_json else None,
                 is_managed_externally=target.is_managed_externally,
-                external_url=target.external_url,
+                is_partition=False,
+                is_physical=False,
+                is_spatial=False,
+                is_temporal=False,
+                type="Unknown",  # figuring this out would require a type inferrer
+                warning_text=metric.warning_text,
             ),
         )
 
-    # physical dataset
-    tables = []
-    if target.sql is None:
-        physical_columns = [column for column in columns if column.is_physical]
-
-        # create table
+    if is_physical_table:
+        # create physical sl_table
         table = NewTable(
             name=target.table_name,
             schema=target.schema,
             catalog=None,  # currently not supported
-            database_id=target.database_id,
-            columns=physical_columns,
+            database_id=database_id,
+            # only save physical columns
+            columns=[column for column in columns if column.is_physical],
             is_managed_externally=target.is_managed_externally,
             external_url=target.external_url,
         )
-        tables.append(table)
-
-    # virtual dataset
+        tables = [table]
+        expression = conditional_quote(target.table_name)
     else:
-        # mark all columns as virtual (not physical)
-        for column in columns:
-            column.is_physical = False
-
-        # find referenced tables
+        # find referenced tables and link to dataset
         parsed = ParsedQuery(target.sql)
         referenced_tables = parsed.tables
-
-        # predicate for finding the referenced tables
         predicate = or_(
             *[
                 and_(
+                    NewTable.database_id == database_id,

Review comment:
       Add `database_id` enforcement as all three together (db + schema + table name) forms a [unique key](https://github.com/apache/superset/blob/88029e21b6068f845d806cfc10d478a5d972ffa5/superset/connectors/sqla/models.py#L509).




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org