You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by dp...@apache.org on 2020/10/12 12:40:33 UTC

[incubator-superset] branch master updated: feat(datasets): REST API bulk delete (#11237)

This is an automated email from the ASF dual-hosted git repository.

dpgaspar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 9e9dac6  feat(datasets): REST API bulk delete (#11237)
9e9dac6 is described below

commit 9e9dac68f78b6423fdd0a274cb88d845df96d408
Author: Daniel Vaz Gaspar <da...@gmail.com>
AuthorDate: Mon Oct 12 13:40:05 2020 +0100

    feat(datasets): REST API bulk delete (#11237)
    
    * feat(datasets): REST API bulk delete
    
    * doc HTTP 400
---
 superset/datasets/api.py                  |  64 +++++++++++++++++
 superset/datasets/commands/bulk_delete.py |  88 +++++++++++++++++++++++
 superset/datasets/commands/exceptions.py  |   4 ++
 superset/datasets/dao.py                  |  26 +++++++
 superset/datasets/schemas.py              |   1 +
 tests/datasets/api_tests.py               | 112 +++++++++++++++++++++++++++++-
 6 files changed, 294 insertions(+), 1 deletion(-)

diff --git a/superset/datasets/api.py b/superset/datasets/api.py
index 40b8c5e..8c7f75e 100644
--- a/superset/datasets/api.py
+++ b/superset/datasets/api.py
@@ -21,14 +21,17 @@ import yaml
 from flask import g, request, Response
 from flask_appbuilder.api import expose, protect, rison, safe
 from flask_appbuilder.models.sqla.interface import SQLAInterface
+from flask_babel import ngettext
 from marshmallow import ValidationError
 
 from superset.connectors.sqla.models import SqlaTable
 from superset.constants import RouteMethod
 from superset.databases.filters import DatabaseFilter
+from superset.datasets.commands.bulk_delete import BulkDeleteDatasetCommand
 from superset.datasets.commands.create import CreateDatasetCommand
 from superset.datasets.commands.delete import DeleteDatasetCommand
 from superset.datasets.commands.exceptions import (
+    DatasetBulkDeleteFailedError,
     DatasetCreateFailedError,
     DatasetDeleteFailedError,
     DatasetForbiddenError,
@@ -44,6 +47,7 @@ from superset.datasets.schemas import (
     DatasetPostSchema,
     DatasetPutSchema,
     DatasetRelatedObjectsResponse,
+    get_delete_ids_schema,
     get_export_ids_schema,
 )
 from superset.views.base import DatasourceFilter, generate_download_headers
@@ -68,6 +72,7 @@ class DatasetRestApi(BaseSupersetModelRestApi):
         RouteMethod.EXPORT,
         RouteMethod.RELATED,
         RouteMethod.DISTINCT,
+        "bulk_delete",
         "refresh",
         "related_objects",
     }
@@ -489,3 +494,62 @@ class DatasetRestApi(BaseSupersetModelRestApi):
             charts={"count": len(charts), "result": charts},
             dashboards={"count": len(dashboards), "result": dashboards},
         )
+
+    @expose("/", methods=["DELETE"])
+    @protect()
+    @safe
+    @statsd_metrics
+    @rison(get_delete_ids_schema)
+    def bulk_delete(self, **kwargs: Any) -> Response:
+        """Delete bulk Datasets
+        ---
+        delete:
+          description: >-
+            Deletes multiple Datasets in a bulk operation.
+          parameters:
+          - in: query
+            name: q
+            content:
+              application/json:
+                schema:
+                  $ref: '#/components/schemas/get_delete_ids_schema'
+          responses:
+            200:
+              description: Dataset bulk delete
+              content:
+                application/json:
+                  schema:
+                    type: object
+                    properties:
+                      message:
+                        type: string
+            400:
+              $ref: '#/components/responses/400'
+            401:
+              $ref: '#/components/responses/401'
+            403:
+              $ref: '#/components/responses/403'
+            404:
+              $ref: '#/components/responses/404'
+            422:
+              $ref: '#/components/responses/422'
+            500:
+              $ref: '#/components/responses/500'
+        """
+        item_ids = kwargs["rison"]
+        try:
+            BulkDeleteDatasetCommand(g.user, item_ids).run()
+            return self.response(
+                200,
+                message=ngettext(
+                    "Deleted %(num)d dataset",
+                    "Deleted %(num)d datasets",
+                    num=len(item_ids),
+                ),
+            )
+        except DatasetNotFoundError:
+            return self.response_404()
+        except DatasetForbiddenError:
+            return self.response_403()
+        except DatasetBulkDeleteFailedError as ex:
+            return self.response_422(message=str(ex))
diff --git a/superset/datasets/commands/bulk_delete.py b/superset/datasets/commands/bulk_delete.py
new file mode 100644
index 0000000..49ad5e4
--- /dev/null
+++ b/superset/datasets/commands/bulk_delete.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+from typing import List, Optional
+
+from flask_appbuilder.security.sqla.models import User
+
+from superset.commands.base import BaseCommand
+from superset.commands.exceptions import DeleteFailedError
+from superset.connectors.sqla.models import SqlaTable
+from superset.datasets.commands.exceptions import (
+    DatasetBulkDeleteFailedError,
+    DatasetForbiddenError,
+    DatasetNotFoundError,
+)
+from superset.datasets.dao import DatasetDAO
+from superset.exceptions import SupersetSecurityException
+from superset.extensions import db, security_manager
+from superset.views.base import check_ownership
+
+logger = logging.getLogger(__name__)
+
+
+class BulkDeleteDatasetCommand(BaseCommand):
+    def __init__(self, user: User, model_ids: List[int]):
+        self._actor = user
+        self._model_ids = model_ids
+        self._models: Optional[List[SqlaTable]] = None
+
+    def run(self) -> None:
+        self.validate()
+        if not self._models:
+            return None
+        try:
+            DatasetDAO.bulk_delete(self._models)
+            for model in self._models:
+                view_menu = (
+                    security_manager.find_view_menu(model.get_perm()) if model else None
+                )
+
+                if view_menu:
+                    permission_views = (
+                        db.session.query(security_manager.permissionview_model)
+                        .filter_by(view_menu=view_menu)
+                        .all()
+                    )
+
+                    for permission_view in permission_views:
+                        db.session.delete(permission_view)
+                    if view_menu:
+                        db.session.delete(view_menu)
+                else:
+                    if not view_menu:
+                        logger.error(
+                            "Could not find the data access permission for the dataset"
+                        )
+            db.session.commit()
+
+            return None
+        except DeleteFailedError as ex:
+            logger.exception(ex.exception)
+            raise DatasetBulkDeleteFailedError()
+
+    def validate(self) -> None:
+        # Validate/populate model exists
+        self._models = DatasetDAO.find_by_ids(self._model_ids)
+        if not self._models or len(self._models) != len(self._model_ids):
+            raise DatasetNotFoundError()
+        # Check ownership
+        for model in self._models:
+            try:
+                check_ownership(model)
+            except SupersetSecurityException:
+                raise DatasetForbiddenError()
diff --git a/superset/datasets/commands/exceptions.py b/superset/datasets/commands/exceptions.py
index b651b6f..ee9d781 100644
--- a/superset/datasets/commands/exceptions.py
+++ b/superset/datasets/commands/exceptions.py
@@ -160,6 +160,10 @@ class DatasetDeleteFailedError(DeleteFailedError):
     message = _("Dataset could not be deleted.")
 
 
+class DatasetBulkDeleteFailedError(DeleteFailedError):
+    message = _("Dataset(s) could not be bulk deleted.")
+
+
 class DatasetRefreshFailedError(UpdateFailedError):
     message = _("Dataset could not be updated.")
 
diff --git a/superset/datasets/dao.py b/superset/datasets/dao.py
index b34b3ef..3b905f4 100644
--- a/superset/datasets/dao.py
+++ b/superset/datasets/dao.py
@@ -207,6 +207,32 @@ class DatasetDAO(BaseDAO):
         """
         return DatasetMetricDAO.create(properties, commit=commit)
 
+    @staticmethod
+    def bulk_delete(models: Optional[List[SqlaTable]], commit: bool = True) -> None:
+        item_ids = [model.id for model in models] if models else []
+        # bulk delete, first delete related data
+        if models:
+            for model in models:
+                model.owners = []
+                db.session.merge(model)
+            db.session.query(SqlMetric).filter(SqlMetric.table_id.in_(item_ids)).delete(
+                synchronize_session="fetch"
+            )
+            db.session.query(TableColumn).filter(
+                TableColumn.table_id.in_(item_ids)
+            ).delete(synchronize_session="fetch")
+        # bulk delete itself
+        try:
+            db.session.query(SqlaTable).filter(SqlaTable.id.in_(item_ids)).delete(
+                synchronize_session="fetch"
+            )
+            if commit:
+                db.session.commit()
+        except SQLAlchemyError as ex:
+            if commit:
+                db.session.rollback()
+            raise ex
+
 
 class DatasetColumnDAO(BaseDAO):
     model_cls = TableColumn
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index a4341db..dd30dbd 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -20,6 +20,7 @@ from flask_babel import lazy_gettext as _
 from marshmallow import fields, Schema, ValidationError
 from marshmallow.validate import Length
 
+get_delete_ids_schema = {"type": "array", "items": {"type": "integer"}}
 get_export_ids_schema = {"type": "array", "items": {"type": "integer"}}
 
 
diff --git a/tests/datasets/api_tests.py b/tests/datasets/api_tests.py
index 2aba788..a2ff652 100644
--- a/tests/datasets/api_tests.py
+++ b/tests/datasets/api_tests.py
@@ -20,6 +20,7 @@ from typing import List
 from unittest.mock import patch
 
 import prison
+import pytest
 import yaml
 from sqlalchemy.sql import func
 
@@ -34,12 +35,14 @@ from superset.extensions import db, security_manager
 from superset.models.core import Database
 from superset.utils.core import backend, get_example_database, get_main_database
 from superset.utils.dict_import_export import export_to_dict
-from superset.views.base import generate_download_headers
 from tests.base_tests import SupersetTestCase
 from tests.conftest import CTAS_SCHEMA_NAME
 
 
 class TestDatasetApi(SupersetTestCase):
+
+    fixture_tables_names = ("ab_permission", "ab_permission_view", "ab_view_menu")
+
     @staticmethod
     def insert_dataset(
         table_name: str, schema: str, owners: List[int], database: Database
@@ -61,6 +64,30 @@ class TestDatasetApi(SupersetTestCase):
             "ab_permission", "", [self.get_user("admin").id], get_main_database()
         )
 
+    def get_fixture_datasets(self) -> List[SqlaTable]:
+        return (
+            db.session.query(SqlaTable)
+            .filter(SqlaTable.table_name.in_(self.fixture_tables_names))
+            .all()
+        )
+
+    @pytest.fixture()
+    def create_datasets(self):
+        with self.create_app().app_context():
+            datasets = []
+            admin = self.get_user("admin")
+            main_db = get_main_database()
+            for tables_name in self.fixture_tables_names:
+                datasets.append(
+                    self.insert_dataset(tables_name, "", [admin.id], main_db)
+                )
+            yield datasets
+
+            # rollback changes
+            for dataset in datasets:
+                db.session.delete(dataset)
+            db.session.commit()
+
     @staticmethod
     def get_energy_usage_dataset():
         example_db = get_example_database()
@@ -789,6 +816,89 @@ class TestDatasetApi(SupersetTestCase):
         db.session.delete(dataset)
         db.session.commit()
 
+    @pytest.mark.usefixtures("create_datasets")
+    def test_bulk_delete_dataset_items(self):
+        """
+        Dataset API: Test bulk delete dataset items
+        """
+        datasets = self.get_fixture_datasets()
+        dataset_ids = [dataset.id for dataset in datasets]
+
+        view_menu_names = []
+        for dataset in datasets:
+            view_menu_names.append(dataset.get_perm())
+
+        self.login(username="admin")
+        uri = f"api/v1/dataset/?q={prison.dumps(dataset_ids)}"
+        rv = self.delete_assert_metric(uri, "bulk_delete")
+        data = json.loads(rv.data.decode("utf-8"))
+        assert rv.status_code == 200
+        expected_response = {"message": f"Deleted {len(datasets)} datasets"}
+        assert data == expected_response
+        datasets = (
+            db.session.query(SqlaTable)
+            .filter(SqlaTable.table_name.in_(self.fixture_tables_names))
+            .all()
+        )
+        assert datasets == []
+        # Assert permissions get cleaned
+        for view_menu_name in view_menu_names:
+            assert security_manager.find_view_menu(view_menu_name) is None
+
+    @pytest.mark.usefixtures("create_datasets")
+    def test_bulk_delete_item_dataset_not_owned(self):
+        """
+        Dataset API: Test bulk delete item not owned
+        """
+        datasets = self.get_fixture_datasets()
+        dataset_ids = [dataset.id for dataset in datasets]
+
+        self.login(username="alpha")
+        uri = f"api/v1/dataset/?q={prison.dumps(dataset_ids)}"
+        rv = self.delete_assert_metric(uri, "bulk_delete")
+        assert rv.status_code == 403
+
+    @pytest.mark.usefixtures("create_datasets")
+    def test_bulk_delete_item_not_found(self):
+        """
+        Dataset API: Test bulk delete item not found
+        """
+        datasets = self.get_fixture_datasets()
+        dataset_ids = [dataset.id for dataset in datasets]
+        dataset_ids.append(db.session.query(func.max(SqlaTable.id)).scalar())
+
+        self.login(username="admin")
+        uri = f"api/v1/dataset/?q={prison.dumps(dataset_ids)}"
+        rv = self.delete_assert_metric(uri, "bulk_delete")
+        assert rv.status_code == 404
+
+    @pytest.mark.usefixtures("create_datasets")
+    def test_bulk_delete_dataset_item_not_authorized(self):
+        """
+        Dataset API: Test bulk delete item not authorized
+        """
+        datasets = self.get_fixture_datasets()
+        dataset_ids = [dataset.id for dataset in datasets]
+
+        self.login(username="gamma")
+        uri = f"api/v1/dataset/?q={prison.dumps(dataset_ids)}"
+        rv = self.client.delete(uri)
+        assert rv.status_code == 401
+
+    @pytest.mark.usefixtures("create_datasets")
+    def test_bulk_delete_dataset_item_incorrect(self):
+        """
+        Dataset API: Test bulk delete item incorrect request
+        """
+        datasets = self.get_fixture_datasets()
+        dataset_ids = [dataset.id for dataset in datasets]
+        dataset_ids.append("Wrong")
+
+        self.login(username="admin")
+        uri = f"api/v1/dataset/?q={prison.dumps(dataset_ids)}"
+        rv = self.client.delete(uri)
+        assert rv.status_code == 400
+
     def test_dataset_item_refresh(self):
         """
         Dataset API: Test item refresh