You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2022/03/29 09:48:34 UTC
[GitHub] [superset] ktmud commented on a change in pull request #19406: perf(alembic): paginize db migration for new dataset models
ktmud commented on a change in pull request #19406:
URL: https://github.com/apache/superset/pull/19406#discussion_r837266456
##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -262,18 +260,21 @@ def after_insert(target: SqlaTable) -> None: # pylint: disable=too-many-locals
columns.append(
NewColumn(
name=column.column_name,
- type=column.type or "Unknown",
- expression=column.expression or conditional_quote(column.column_name),
description=column.description,
- is_temporal=column.is_dttm,
+ external_url=target.external_url,
+ expression=column.expression or conditional_quote(column.column_name),
+ extra_json=json.dumps(extra_json) if extra_json else None,
is_aggregation=False,
- is_physical=column.expression is None or column.expression == "",
- is_spatial=False,
- is_partition=False,
is_increase_desired=True,
- extra_json=json.dumps(extra_json) if extra_json else None,
is_managed_externally=target.is_managed_externally,
- external_url=target.external_url,
+ is_temporal=column.is_dttm,
+ is_spatial=False,
+ is_partition=False,
+ is_physical=(
+ is_physical_table
+ and (column.expression is None or column.expression == "")
+ ),
+ type=column.type or "Unknown",
Review comment:
A-Z
##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -292,69 +293,62 @@ def after_insert(target: SqlaTable) -> None: # pylint: disable=too-many-locals
columns.append(
NewColumn(
name=metric.metric_name,
- type="Unknown", # figuring this out would require a type inferrer
- expression=metric.expression,
- warning_text=metric.warning_text,
description=metric.description,
+ expression=metric.expression,
+ external_url=target.external_url,
+ extra_json=json.dumps(extra_json) if extra_json else None,
is_aggregation=True,
is_additive=is_additive,
- is_physical=False,
- is_spatial=False,
- is_partition=False,
is_increase_desired=True,
- extra_json=json.dumps(extra_json) if extra_json else None,
is_managed_externally=target.is_managed_externally,
- external_url=target.external_url,
+ is_partition=False,
+ is_physical=False,
+ is_spatial=False,
+ is_temporal=False,
+ type="Unknown", # figuring this out would require a type inferrer
+ warning_text=metric.warning_text,
Review comment:
Ditto A-Z
##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -499,14 +484,46 @@ def upgrade():
),
)
- # migrate existing datasets to the new models
- bind = op.get_bind()
- session = db.Session(bind=bind) # pylint: disable=no-member
+ with op.batch_alter_table("sl_columns") as batch_op:
+ batch_op.create_unique_constraint("uq_sl_columns_uuid", ["uuid"])
+
+ with op.batch_alter_table("sl_tables") as batch_op:
+ batch_op.create_unique_constraint("uq_sl_tables_uuid", ["uuid"])
+
+ with op.batch_alter_table("sl_datasets") as batch_op:
+ batch_op.create_unique_constraint("uq_sl_datasets_uuid", ["uuid"])
+ batch_op.create_unique_constraint(
+ "uq_sl_datasets_sqlatable_id", ["sqlatable_id"]
+ )
Review comment:
Move constraints to the end to improve readability.
##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -232,13 +229,14 @@ def after_insert(target: SqlaTable) -> None: # pylint: disable=too-many-locals
"""
Copy old datasets to the new models.
"""
- session = inspect(target).session
+ session: Session = inspect(target).session
+ database_id = target.database_id
+ is_physical_table = not target.sql
Review comment:
`BaseDatasource` checks whether a table is physical/virtual by checking whether `table.sql` is falsy. I'm changing all `target.sql is None` in this script to keep it consistent with current behavior.
##########
File path: superset/migrations/versions/b8d3a24d9131_new_dataset_models.py
##########
@@ -292,69 +293,62 @@ def after_insert(target: SqlaTable) -> None: # pylint: disable=too-many-locals
columns.append(
NewColumn(
name=metric.metric_name,
- type="Unknown", # figuring this out would require a type inferrer
- expression=metric.expression,
- warning_text=metric.warning_text,
description=metric.description,
+ expression=metric.expression,
+ external_url=target.external_url,
+ extra_json=json.dumps(extra_json) if extra_json else None,
is_aggregation=True,
is_additive=is_additive,
- is_physical=False,
- is_spatial=False,
- is_partition=False,
is_increase_desired=True,
- extra_json=json.dumps(extra_json) if extra_json else None,
is_managed_externally=target.is_managed_externally,
- external_url=target.external_url,
+ is_partition=False,
+ is_physical=False,
+ is_spatial=False,
+ is_temporal=False,
+ type="Unknown", # figuring this out would require a type inferrer
+ warning_text=metric.warning_text,
),
)
- # physical dataset
- tables = []
- if target.sql is None:
- physical_columns = [column for column in columns if column.is_physical]
-
- # create table
+ if is_physical_table:
+ # create physical sl_table
table = NewTable(
name=target.table_name,
schema=target.schema,
catalog=None, # currently not supported
- database_id=target.database_id,
- columns=physical_columns,
+ database_id=database_id,
+ # only save physical columns
+ columns=[column for column in columns if column.is_physical],
is_managed_externally=target.is_managed_externally,
external_url=target.external_url,
)
- tables.append(table)
-
- # virtual dataset
+ tables = [table]
+ expression = conditional_quote(target.table_name)
else:
- # mark all columns as virtual (not physical)
- for column in columns:
- column.is_physical = False
-
- # find referenced tables
+ # find referenced tables and link to dataset
parsed = ParsedQuery(target.sql)
referenced_tables = parsed.tables
-
- # predicate for finding the referenced tables
predicate = or_(
*[
and_(
+ NewTable.database_id == database_id,
Review comment:
Add `database_id` enforcement as all three together (db + schema + table name) forms a [unique key](https://github.com/apache/superset/blob/88029e21b6068f845d806cfc10d478a5d972ffa5/superset/connectors/sqla/models.py#L509).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org