You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2020/09/10 16:22:29 UTC

[GitHub] [incubator-superset] dpgaspar commented on a change in pull request #10761: feat: implement cache invalidation api

dpgaspar commented on a change in pull request #10761:
URL: https://github.com/apache/incubator-superset/pull/10761#discussion_r486469842



##########
File path: superset/cachekeys/schemas.py
##########
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# RISON/JSON schemas for query parameters
+from marshmallow import fields, Schema, validate
+
+from superset.charts.schemas import (
+    datasource_name_description,
+    datasource_type_description,
+    datasource_uid_description,
+)
+
+
+class Datasource(Schema):
+    database_name = fields.String(description="Datasource name",)
+    datasource_name = fields.String(description=datasource_name_description,)
+    schema = fields.String(description="Datasource schema",)
+    datasource_type = fields.String(
+        description=datasource_type_description,
+        validate=validate.OneOf(choices=("druid", "table", "view")),
+        allow_none=True,

Review comment:
       I think that `get_datasource_by_name` will not handle well a `None` here, replace by `required=True`.
   

##########
File path: superset/cachekeys/api.py
##########
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from flask import request, Response
+from flask_appbuilder import expose
+from flask_appbuilder.api import safe
+from flask_appbuilder.models.sqla.interface import SQLAInterface
+from flask_appbuilder.security.decorators import protect
+from jsonschema import ValidationError
+
+from superset.cachekeys.schemas import CacheInvalidationRequestSchema
+from superset.connectors.connector_registry import ConnectorRegistry
+from superset.extensions import cache_manager, db, event_logger
+from superset.models.cache import CacheKey
+from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
+
+
+class CacheRestApi(BaseSupersetModelRestApi):
+    datamodel = SQLAInterface(CacheKey)
+    resource_name = "cache"

Review comment:
       nit: since this was renamed to cache, evaluate if placing everything on `cachekeys` still makes sense

##########
File path: tests/base_tests.py
##########
@@ -69,6 +71,39 @@ def get_resp(
     return resp.data.decode("utf-8")
 
 
+def post_assert_metric(

Review comment:
       Is this the same but on a different place?

##########
File path: tests/cachekeys/api_tests.py
##########
@@ -0,0 +1,131 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# isort:skip_file
+"""Unit tests for Superset"""
+from tests.test_app import app  # noqa
+
+from superset.extensions import cache_manager, db
+from superset.models.cache import CacheKey
+from tests.base_tests import (
+    SupersetTestCase,
+    post_assert_metric,
+    test_client,
+    logged_in_admin,
+)  # noqa
+
+
+def test_invalidate_cache(logged_in_admin):
+    rv = post_assert_metric(
+        test_client,
+        "api/v1/cache/invalidate",
+        {"datasource_uids": ["3__table"]},
+        "invalidate",
+    )
+    assert rv.status_code == 201
+
+
+def test_invalidate_existing_cache(logged_in_admin):
+    db.session.add(CacheKey(cache_key="cache_key", datasource_uid="3__table"))
+    db.session.commit()
+    cache_manager.cache.set("cache_key", "value")
+
+    rv = post_assert_metric(
+        test_client,
+        "api/v1/cache/invalidate",
+        {"datasource_uids": ["3__table"]},
+        "invalidate",
+    )
+
+    assert rv.status_code == 201
+    assert cache_manager.cache.get("cache_key") == None
+    assert (
+        not db.session.query(CacheKey).filter(CacheKey.cache_key == "cache_key").first()
+    )
+
+
+def test_invalidate_existing_caches(logged_in_admin):
+    bn = SupersetTestCase.get_birth_names_dataset()
+
+    db.session.add(CacheKey(cache_key="cache_key1", datasource_uid="3__druid"))
+    db.session.add(CacheKey(cache_key="cache_key2", datasource_uid="3__druid"))
+    db.session.add(CacheKey(cache_key="cache_key4", datasource_uid=f"{bn.id}__table"))
+    db.session.add(CacheKey(cache_key="cache_keyX", datasource_uid="X__table"))
+    db.session.commit()
+
+    cache_manager.cache.set("cache_key1", "value")
+    cache_manager.cache.set("cache_key2", "value")
+    cache_manager.cache.set("cache_key4", "value")
+    cache_manager.cache.set("cache_keyX", "value")
+
+    rv = post_assert_metric(
+        test_client,
+        "api/v1/cache/invalidate",
+        {
+            "datasource_uids": ["3__druid", "4__druid"],

Review comment:
       can we add a test with edge case POST's: `{ "datasource_uids": [], "datasources": []}`
   `{ "datasource_uids": [], "datasources": [{"datasource_name": "", "datasource_type": null}]}`
   
   etc

##########
File path: superset/cachekeys/api.py
##########
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from flask import request, Response
+from flask_appbuilder import expose
+from flask_appbuilder.api import safe
+from flask_appbuilder.models.sqla.interface import SQLAInterface
+from flask_appbuilder.security.decorators import protect
+from jsonschema import ValidationError
+
+from superset.cachekeys.schemas import CacheInvalidationRequestSchema
+from superset.connectors.connector_registry import ConnectorRegistry
+from superset.extensions import cache_manager, db, event_logger
+from superset.models.cache import CacheKey
+from superset.views.base_api import BaseSupersetModelRestApi, statsd_metrics
+
+
+class CacheRestApi(BaseSupersetModelRestApi):
+    datamodel = SQLAInterface(CacheKey)
+    resource_name = "cache"
+    allow_browser_login = True
+    class_permission_name = "CacheRestApi"
+
+    openapi_spec_component_schemas = (CacheInvalidationRequestSchema,)
+
+    @expose("/invalidate", methods=["POST"])
+    @event_logger.log_this
+    @protect()
+    @safe
+    @statsd_metrics
+    def invalidate(self) -> Response:
+        """
+        Takes a list of datasources, finds the associated cache records and
+        invalidates them and removes the database records
+
+        ---
+        post:
+          description: >-
+            Takes a list of datasources, finds the associated cache records and
+            invalidates them and removes the database records
+          requestBody:
+            description: >-
+              A list of datasources uuid or the tuples of database and datasource names
+            required: true
+            content:
+              application/json:
+                schema:
+                  $ref: "#/components/schemas/CacheInvalidationRequestSchema"
+          responses:
+            201:
+              description: cache was successfully invalidated
+            400:
+              $ref: '#/components/responses/400'
+            500:
+              $ref: '#/components/responses/500'
+        """
+        try:
+            datasources = CacheInvalidationRequestSchema().load(request.json)
+        except KeyError:
+            return self.response_400(message="Request is incorrect")
+        except ValidationError as error:
+            return self.response_400(message=error.message)
+        datasource_uids = set(datasources.get("datasource_uids", []))
+        for ds in datasources.get("datasources", []):
+            ds_obj = ConnectorRegistry.get_datasource_by_name(
+                session=db.session,
+                datasource_type=ds.get("datasource_type"),
+                datasource_name=ds.get("datasource_name"),
+                schema=ds.get("schema"),
+                database_name=ds.get("database_name"),
+            )
+            if ds_obj:
+                datasource_uids.add(ds_obj.uid)
+
+        cache_keys = (
+            db.session.query(CacheKey)
+            .filter(CacheKey.datasource_uid.in_(datasource_uids))
+            .all()
+        )
+        if cache_keys:
+            cache_manager.cache.delete_many(*[c.cache_key for c in cache_keys])

Review comment:
       A couple of questions:
   - if a cache `delete_many` fails, we may end up with delete keys on the cache that still exist on `CacheKey` will a next run of `delete_many` fail if it can't find a key?
   - the same applies if a delete `CacheKey` fails, would also be safer to try/except and rollback
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org