You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by jo...@apache.org on 2023/06/28 23:03:33 UTC

[superset] branch master updated: chore(dao): Add explicit ON DELETE CASCADE when deleting datasets (#24488)

This is an automated email from the ASF dual-hosted git repository.

johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 75543af550 chore(dao): Add explicit ON DELETE CASCADE when deleting datasets (#24488)
75543af550 is described below

commit 75543af5504c13dbd60bc6d327efb9700e459a51
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Wed Jun 28 16:03:25 2023 -0700

    chore(dao): Add explicit ON DELETE CASCADE when deleting datasets (#24488)
---
 UPDATING.md                                        |  1 +
 superset/connectors/sqla/models.py                 | 10 ++-
 superset/daos/dataset.py                           | 26 +++---
 superset/datasets/commands/bulk_delete.py          | 30 +------
 superset/datasets/commands/delete.py               | 18 ++---
 ..._add_on_delete_cascade_for_tables_references.py | 94 ++++++++++++++++++++++
 superset/utils/core.py                             | 21 ++++-
 tests/integration_tests/charts/api_tests.py        |  2 -
 tests/integration_tests/charts/commands_tests.py   |  1 -
 tests/integration_tests/commands_test.py           |  3 +-
 .../integration_tests/dashboards/commands_tests.py |  1 -
 tests/integration_tests/databases/api_tests.py     |  2 -
 tests/integration_tests/datasets/api_tests.py      |  2 -
 tests/integration_tests/datasets/commands_tests.py |  2 -
 tests/integration_tests/sqla_models_tests.py       |  9 +--
 15 files changed, 144 insertions(+), 78 deletions(-)

diff --git a/UPDATING.md b/UPDATING.md
index cf608e9fd5..b95cedc1e9 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -24,6 +24,7 @@ assists people when migrating to a new version.
 
 ## Next
 
+- [24488](https://github.com/apache/superset/pull/24488): Augments the foreign key constraints for the `sql_metrics`, `sqlatable_user`, and `table_columns` tables which reference the `tables` table to include an explicit CASCADE ON DELETE to ensure the relevant records are deleted when a dataset is deleted. Scheduled downtime may be advised.
 - [24335](https://github.com/apache/superset/pull/24335): Removed deprecated API `/superset/filter/<datasource_type>/<int:datasource_id>/<column>/`
 - [24185](https://github.com/apache/superset/pull/24185): `/api/v1/database/test_connection` and `api/v1/database/validate_parameters` permissions changed from `can_read` to `can_write`. Only Admin user's have access.
 - [24232](https://github.com/apache/superset/pull/24232): Enables ENABLE_TEMPLATE_REMOVE_FILTERS, DRILL_TO_DETAIL, DASHBOARD_CROSS_FILTERS by default, marks VERSIONED_EXPORT and ENABLE_TEMPLATE_REMOVE_FILTERS as deprecated.
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 9efbd1db92..ea5c0c8de0 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -196,7 +196,7 @@ class TableColumn(Model, BaseColumn, CertificationMixin):
 
     __tablename__ = "table_columns"
     __table_args__ = (UniqueConstraint("table_id", "column_name"),)
-    table_id = Column(Integer, ForeignKey("tables.id"))
+    table_id = Column(Integer, ForeignKey("tables.id", ondelete="CASCADE"))
     table: Mapped[SqlaTable] = relationship(
         "SqlaTable",
         back_populates="columns",
@@ -400,7 +400,7 @@ class SqlMetric(Model, BaseMetric, CertificationMixin):
 
     __tablename__ = "sql_metrics"
     __table_args__ = (UniqueConstraint("table_id", "metric_name"),)
-    table_id = Column(Integer, ForeignKey("tables.id"))
+    table_id = Column(Integer, ForeignKey("tables.id", ondelete="CASCADE"))
     table: Mapped[SqlaTable] = relationship(
         "SqlaTable",
         back_populates="metrics",
@@ -470,8 +470,8 @@ sqlatable_user = Table(
     "sqlatable_user",
     metadata,
     Column("id", Integer, primary_key=True),
-    Column("user_id", Integer, ForeignKey("ab_user.id")),
-    Column("table_id", Integer, ForeignKey("tables.id")),
+    Column("user_id", Integer, ForeignKey("ab_user.id", ondelete="CASCADE")),
+    Column("table_id", Integer, ForeignKey("tables.id", ondelete="CASCADE")),
 )
 
 
@@ -508,11 +508,13 @@ class SqlaTable(
         TableColumn,
         back_populates="table",
         cascade="all, delete-orphan",
+        passive_deletes=True,
     )
     metrics: Mapped[list[SqlMetric]] = relationship(
         SqlMetric,
         back_populates="table",
         cascade="all, delete-orphan",
+        passive_deletes=True,
     )
     metric_class = SqlMetric
     column_class = TableColumn
diff --git a/superset/daos/dataset.py b/superset/daos/dataset.py
index 4634a7e46f..74b7fa50eb 100644
--- a/superset/daos/dataset.py
+++ b/superset/daos/dataset.py
@@ -19,6 +19,7 @@ from typing import Any, Optional
 
 from sqlalchemy.exc import SQLAlchemyError
 
+from superset import security_manager
 from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
 from superset.daos.base import BaseDAO
 from superset.extensions import db
@@ -361,25 +362,24 @@ class DatasetDAO(BaseDAO[SqlaTable]):  # pylint: disable=too-many-public-methods
         """
         return DatasetMetricDAO.create(properties, commit=commit)
 
-    @staticmethod
-    def bulk_delete(models: Optional[list[SqlaTable]], commit: bool = True) -> None:
+    @classmethod
+    def bulk_delete(
+        cls, models: Optional[list[SqlaTable]], commit: bool = True
+    ) -> None:
         item_ids = [model.id for model in models] if models else []
-        # bulk delete, first delete related data
-        if models:
-            for model in models:
-                model.owners = []
-                db.session.merge(model)
-            db.session.query(SqlMetric).filter(SqlMetric.table_id.in_(item_ids)).delete(
-                synchronize_session="fetch"
-            )
-            db.session.query(TableColumn).filter(
-                TableColumn.table_id.in_(item_ids)
-            ).delete(synchronize_session="fetch")
         # bulk delete itself
         try:
             db.session.query(SqlaTable).filter(SqlaTable.id.in_(item_ids)).delete(
                 synchronize_session="fetch"
             )
+
+            if models:
+                connection = db.session.connection()
+                mapper = next(iter(cls.model_cls.registry.mappers))  # type: ignore
+
+                for model in models:
+                    security_manager.dataset_after_delete(mapper, connection, model)
+
             if commit:
                 db.session.commit()
         except SQLAlchemyError as ex:
diff --git a/superset/datasets/commands/bulk_delete.py b/superset/datasets/commands/bulk_delete.py
index 9733aa21e8..6937dd20b7 100644
--- a/superset/datasets/commands/bulk_delete.py
+++ b/superset/datasets/commands/bulk_delete.py
@@ -28,7 +28,6 @@ from superset.datasets.commands.exceptions import (
     DatasetNotFoundError,
 )
 from superset.exceptions import SupersetSecurityException
-from superset.extensions import db
 
 logger = logging.getLogger(__name__)
 
@@ -40,35 +39,10 @@ class BulkDeleteDatasetCommand(BaseCommand):
 
     def run(self) -> None:
         self.validate()
-        if not self._models:
-            return None
+        assert self._models
+
         try:
             DatasetDAO.bulk_delete(self._models)
-            for model in self._models:
-                view_menu = (
-                    security_manager.find_view_menu(model.get_perm()) if model else None
-                )
-
-                if view_menu:
-                    permission_views = (
-                        db.session.query(security_manager.permissionview_model)
-                        .filter_by(view_menu=view_menu)
-                        .all()
-                    )
-
-                    for permission_view in permission_views:
-                        db.session.delete(permission_view)
-                    if view_menu:
-                        db.session.delete(view_menu)
-                else:
-                    if not view_menu:
-                        logger.error(
-                            "Could not find the data access permission for the dataset",
-                            exc_info=True,
-                        )
-            db.session.commit()
-
-            return None
         except DeleteFailedError as ex:
             logger.exception(ex.exception)
             raise DatasetBulkDeleteFailedError() from ex
diff --git a/superset/datasets/commands/delete.py b/superset/datasets/commands/delete.py
index 7078f09c37..1c2147b959 100644
--- a/superset/datasets/commands/delete.py
+++ b/superset/datasets/commands/delete.py
@@ -15,10 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.
 import logging
-from typing import cast, Optional
+from typing import Optional
 
 from flask_appbuilder.models.sqla import Model
-from sqlalchemy.exc import SQLAlchemyError
 
 from superset import security_manager
 from superset.commands.base import BaseCommand
@@ -31,7 +30,6 @@ from superset.datasets.commands.exceptions import (
     DatasetNotFoundError,
 )
 from superset.exceptions import SupersetSecurityException
-from superset.extensions import db
 
 logger = logging.getLogger(__name__)
 
@@ -43,19 +41,13 @@ class DeleteDatasetCommand(BaseCommand):
 
     def run(self) -> Model:
         self.validate()
-        self._model = cast(SqlaTable, self._model)
+        assert self._model
+
         try:
-            # Even though SQLAlchemy should in theory delete rows from the association
-            # table, sporadically Superset will error because the rows are not deleted.
-            # Let's do it manually here to prevent the error.
-            self._model.owners = []
-            dataset = DatasetDAO.delete(self._model, commit=False)
-            db.session.commit()
-        except (SQLAlchemyError, DAODeleteFailedError) as ex:
+            return DatasetDAO.delete(self._model)
+        except DAODeleteFailedError as ex:
             logger.exception(ex)
-            db.session.rollback()
             raise DatasetDeleteFailedError() from ex
-        return dataset
 
     def validate(self) -> None:
         # Validate/populate model exists
diff --git a/superset/migrations/versions/2023-06-22_13-39_6fbe660cac39_add_on_delete_cascade_for_tables_references.py b/superset/migrations/versions/2023-06-22_13-39_6fbe660cac39_add_on_delete_cascade_for_tables_references.py
new file mode 100644
index 0000000000..bef12f8cf4
--- /dev/null
+++ b/superset/migrations/versions/2023-06-22_13-39_6fbe660cac39_add_on_delete_cascade_for_tables_references.py
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""add on delete cascade for tables references
+
+Revision ID: 6fbe660cac39
+Revises: 83e1abbe777f
+Create Date: 2023-06-22 13:39:47.989373
+
+"""
+from __future__ import annotations
+
+# revision identifiers, used by Alembic.
+revision = "6fbe660cac39"
+down_revision = "83e1abbe777f"
+
+import sqlalchemy as sa
+from alembic import op
+
+from superset.utils.core import generic_find_fk_constraint_name
+
+
+def migrate(ondelete: str | None) -> None:
+    """
+    Redefine the foreign key constraints, via a successive DROP and ADD, for all tables
+    related to the `tables` table to include the ON DELETE construct for cascading
+    purposes.
+
+    :param ondelete: If set, emit ON DELETE <value> when issuing DDL for this constraint
+    """
+
+    bind = op.get_bind()
+    insp = sa.engine.reflection.Inspector.from_engine(bind)
+
+    conv = {
+        "fk": "fk_%(table_name)s_%(column_0_name)s_%(referred_table_name)s",
+    }
+
+    for table in ("sql_metrics", "table_columns"):
+        with op.batch_alter_table(table, naming_convention=conv) as batch_op:
+            if constraint := generic_find_fk_constraint_name(
+                table=table,
+                columns={"id"},
+                referenced="tables",
+                insp=insp,
+            ):
+                batch_op.drop_constraint(constraint, type_="foreignkey")
+
+            batch_op.create_foreign_key(
+                constraint_name=f"fk_{table}_table_id_tables",
+                referent_table="tables",
+                local_cols=["table_id"],
+                remote_cols=["id"],
+                ondelete=ondelete,
+            )
+
+    with op.batch_alter_table("sqlatable_user", naming_convention=conv) as batch_op:
+        for table, column in zip(("ab_user", "tables"), ("user_id", "table_id")):
+            if constraint := generic_find_fk_constraint_name(
+                table="sqlatable_user",
+                columns={"id"},
+                referenced=table,
+                insp=insp,
+            ):
+                batch_op.drop_constraint(constraint, type_="foreignkey")
+
+            batch_op.create_foreign_key(
+                constraint_name=f"fk_sqlatable_user_{column}_{table}",
+                referent_table=table,
+                local_cols=[column],
+                remote_cols=["id"],
+                ondelete=ondelete,
+            )
+
+
+def upgrade():
+    migrate(ondelete="CASCADE")
+
+
+def downgrade():
+    migrate(ondelete=None)
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 125a406bf5..226daad45e 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -29,6 +29,7 @@ import platform
 import re
 import signal
 import smtplib
+import sqlite3
 import ssl
 import tempfile
 import threading
@@ -36,7 +37,7 @@ import traceback
 import uuid
 import zlib
 from collections.abc import Iterable, Iterator, Sequence
-from contextlib import contextmanager
+from contextlib import closing, contextmanager
 from dataclasses import dataclass
 from datetime import date, datetime, time, timedelta
 from email.mime.application import MIMEApplication
@@ -849,6 +850,24 @@ def pessimistic_connection_handling(some_engine: Engine) -> None:
             # restore 'close with result'
             connection.should_close_with_result = save_should_close_with_result
 
+        if some_engine.dialect.name == "sqlite":
+
+            @event.listens_for(some_engine, "connect")
+            def set_sqlite_pragma(  # pylint: disable=unused-argument
+                connection: sqlite3.Connection,
+                *args: Any,
+            ) -> None:
+                r"""
+                Enable foreign key support for SQLite.
+
+                :param connection: The SQLite connection
+                :param \*args: Additional positional arguments
+                :see: https://docs.sqlalchemy.org/en/latest/dialects/sqlite.html
+                """
+
+                with closing(connection.cursor()) as cursor:
+                    cursor.execute("PRAGMA foreign_keys=ON")
+
 
 def send_email_smtp(  # pylint: disable=invalid-name,too-many-arguments,too-many-locals
     to: str,
diff --git a/tests/integration_tests/charts/api_tests.py b/tests/integration_tests/charts/api_tests.py
index 69e99978e5..68b2b55809 100644
--- a/tests/integration_tests/charts/api_tests.py
+++ b/tests/integration_tests/charts/api_tests.py
@@ -1504,7 +1504,6 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin, InsertChartMixin):
         assert chart.table == dataset
 
         chart.owners = []
-        dataset.owners = []
         db.session.delete(chart)
         db.session.commit()
         db.session.delete(dataset)
@@ -1577,7 +1576,6 @@ class TestChartApi(SupersetTestCase, ApiOwnersTestCaseMixin, InsertChartMixin):
         chart = db.session.query(Slice).filter_by(uuid=chart_config["uuid"]).one()
 
         chart.owners = []
-        dataset.owners = []
         db.session.delete(chart)
         db.session.commit()
         db.session.delete(dataset)
diff --git a/tests/integration_tests/charts/commands_tests.py b/tests/integration_tests/charts/commands_tests.py
index 217b1655a5..d0a59a3100 100644
--- a/tests/integration_tests/charts/commands_tests.py
+++ b/tests/integration_tests/charts/commands_tests.py
@@ -283,7 +283,6 @@ class TestImportChartsCommand(SupersetTestCase):
         assert chart.owners == [admin]
 
         chart.owners = []
-        dataset.owners = []
         database.owners = []
         db.session.delete(chart)
         db.session.delete(dataset)
diff --git a/tests/integration_tests/commands_test.py b/tests/integration_tests/commands_test.py
index 77fbad05f3..e34a040724 100644
--- a/tests/integration_tests/commands_test.py
+++ b/tests/integration_tests/commands_test.py
@@ -148,7 +148,6 @@ class TestImportAssetsCommand(SupersetTestCase):
 
         dashboard.owners = []
         chart.owners = []
-        dataset.owners = []
         database.owners = []
         db.session.delete(dashboard)
         db.session.delete(chart)
@@ -165,6 +164,7 @@ class TestImportAssetsCommand(SupersetTestCase):
             "charts/imported_chart.yaml": yaml.safe_dump(chart_config),
             "dashboards/imported_dashboard.yaml": yaml.safe_dump(dashboard_config),
         }
+
         command = ImportAssetsCommand(contents)
         command.run()
         chart = db.session.query(Slice).filter_by(uuid=chart_config["uuid"]).one()
@@ -193,7 +193,6 @@ class TestImportAssetsCommand(SupersetTestCase):
         dashboard.owners = []
 
         chart.owners = []
-        dataset.owners = []
         database.owners = []
         db.session.delete(dashboard)
         db.session.delete(chart)
diff --git a/tests/integration_tests/dashboards/commands_tests.py b/tests/integration_tests/dashboards/commands_tests.py
index ad9152585e..f9ff4e7dae 100644
--- a/tests/integration_tests/dashboards/commands_tests.py
+++ b/tests/integration_tests/dashboards/commands_tests.py
@@ -575,7 +575,6 @@ class TestImportDashboardsCommand(SupersetTestCase):
 
         dashboard.owners = []
         chart.owners = []
-        dataset.owners = []
         database.owners = []
         db.session.delete(dashboard)
         db.session.delete(chart)
diff --git a/tests/integration_tests/databases/api_tests.py b/tests/integration_tests/databases/api_tests.py
index 005ac0d290..f2fe9380c3 100644
--- a/tests/integration_tests/databases/api_tests.py
+++ b/tests/integration_tests/databases/api_tests.py
@@ -2143,7 +2143,6 @@ class TestDatabaseApi(SupersetTestCase):
         assert dataset.table_name == "imported_dataset"
         assert str(dataset.uuid) == dataset_config["uuid"]
 
-        dataset.owners = []
         db.session.delete(dataset)
         db.session.commit()
         db.session.delete(database)
@@ -2214,7 +2213,6 @@ class TestDatabaseApi(SupersetTestCase):
             db.session.query(Database).filter_by(uuid=database_config["uuid"]).one()
         )
         dataset = database.tables[0]
-        dataset.owners = []
         db.session.delete(dataset)
         db.session.commit()
         db.session.delete(database)
diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py
index c0d86a876a..d31b8f0b28 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -2099,7 +2099,6 @@ class TestDatasetApi(SupersetTestCase):
         assert dataset.table_name == "imported_dataset"
         assert str(dataset.uuid) == dataset_config["uuid"]
 
-        dataset.owners = []
         db.session.delete(dataset)
         db.session.commit()
         db.session.delete(database)
@@ -2201,7 +2200,6 @@ class TestDatasetApi(SupersetTestCase):
         )
         dataset = database.tables[0]
 
-        dataset.owners = []
         db.session.delete(dataset)
         db.session.commit()
         db.session.delete(database)
diff --git a/tests/integration_tests/datasets/commands_tests.py b/tests/integration_tests/datasets/commands_tests.py
index 4919b8886d..b5956bd5d2 100644
--- a/tests/integration_tests/datasets/commands_tests.py
+++ b/tests/integration_tests/datasets/commands_tests.py
@@ -400,7 +400,6 @@ class TestImportDatasetsCommand(SupersetTestCase):
         assert column.description is None
         assert column.python_date_format is None
 
-        dataset.owners = []
         dataset.database.owners = []
         db.session.delete(dataset)
         db.session.delete(dataset.database)
@@ -529,7 +528,6 @@ class TestImportDatasetsCommand(SupersetTestCase):
         )
         assert len(database.tables) == 1
 
-        database.tables[0].owners = []
         database.owners = []
         db.session.delete(database.tables[0])
         db.session.delete(database)
diff --git a/tests/integration_tests/sqla_models_tests.py b/tests/integration_tests/sqla_models_tests.py
index 854a0c9be0..91419010b7 100644
--- a/tests/integration_tests/sqla_models_tests.py
+++ b/tests/integration_tests/sqla_models_tests.py
@@ -189,11 +189,6 @@ class TestDatabaseModel(SupersetTestCase):
         self.assertTrue(table3.has_extra_cache_key_calls(query_obj))
         assert extra_cache_keys == ["abc"]
 
-        # Cleanup
-        for table in [table1, table2, table3]:
-            db.session.delete(table)
-        db.session.commit()
-
     @patch("superset.jinja_context.g")
     def test_jinja_metrics_and_calc_columns(self, flask_g):
         flask_g.user.username = "abc"
@@ -430,7 +425,7 @@ class TestDatabaseModel(SupersetTestCase):
         }
 
         table = SqlaTable(
-            table_name="test_has_extra_cache_keys_table",
+            table_name="test_multiple_sql_statements",
             sql="SELECT 'foo' as grp, 1 as num; SELECT 'bar' as grp, 2 as num",
             database=get_example_database(),
         )
@@ -451,7 +446,7 @@ class TestDatabaseModel(SupersetTestCase):
         }
 
         table = SqlaTable(
-            table_name="test_has_extra_cache_keys_table",
+            table_name="test_dml_statement",
             sql="DELETE FROM foo",
             database=get_example_database(),
         )