You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2022/03/18 14:20:25 UTC

[GitHub] [superset] michael-s-molina commented on a change in pull request #19232: feat(key-value): add superset cache

michael-s-molina commented on a change in pull request #19232:
URL: https://github.com/apache/superset/pull/19232#discussion_r830019202



##########
File path: superset/key_value/cache.py
##########
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime, timedelta
+from hashlib import md5
+from typing import Any, Dict, List, Optional
+from uuid import UUID, uuid3, uuid4
+
+from flask import Flask
+from flask_caching import BaseCache
+
+from superset.key_value.exceptions import KeyValueCreateFailedError
+from superset.key_value.types import KeyType
+
+RESOURCE = "superset_cache"
+KEY_TYPE: KeyType = "uuid"
+
+
+class SupersetCache(BaseCache):
+    def __init__(self, namespace: UUID, default_timeout: int = 300) -> None:
+        super().__init__(default_timeout)
+        self.namespace = namespace or uuid4()
+
+    @classmethod
+    def factory(
+        cls, app: Flask, config: Dict[str, Any], args: List[Any], kwargs: Dict[str, Any]
+    ) -> BaseCache:
+        # base namespace for generating deterministic UUIDs
+        md5_obj = md5()
+        seed = config.get("CACHE_KEY_PREFIX", "")
+        md5_obj.update(seed.encode("utf-8"))
+        kwargs["namespace"] = UUID(md5_obj.hexdigest())
+        return cls(*args, **kwargs)
+
+    def get_key(self, key: str) -> str:
+        return str(uuid3(self.namespace, key))
+
+    @staticmethod
+    def _purge() -> None:

Review comment:
       Can we use `_prune` instead of `_purge`? If you check Flask-Caching backends, that's the name they are using.

##########
File path: superset-frontend/cypress-base/cypress/integration/dashboard/key_value.test.ts
##########
@@ -27,7 +27,7 @@ interface QueryString {
   native_filters_key: string;
 }
 
-xdescribe('nativefiler url param key', () => {
+describe('nativefiler url param key', () => {

Review comment:
       I'm not sure this problem is related to the cache. Although it's the `key_value.test` that is failing, you can see that the error is happening in `dashboardHelper/getChartGridComponent`

##########
File path: superset/utils/cache_manager.py
##########
@@ -40,27 +38,27 @@ def _init_cache(
     ) -> None:
         cache_config = app.config[cache_config_key]
         cache_type = cache_config.get("CACHE_TYPE")
-        if app.debug and cache_type is None:
-            cache_threshold = cache_config.get("CACHE_THRESHOLD", math.inf)
+        if required and cache_type in (None, "SupersetCache"):
+            if cache_type is None:
+                logger.warning(
+                    "Falling back to built-in key-value cache for the following "

Review comment:
       Maybe "Falling back to the built-in cache, that stores data in the metadata database, for the following..."

##########
File path: superset/key_value/cache.py
##########
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime, timedelta
+from hashlib import md5
+from typing import Any, Dict, List, Optional
+from uuid import UUID, uuid3, uuid4
+
+from flask import Flask
+from flask_caching import BaseCache
+
+from superset.key_value.exceptions import KeyValueCreateFailedError
+from superset.key_value.types import KeyType
+
+RESOURCE = "superset_cache"
+KEY_TYPE: KeyType = "uuid"
+
+
+class SupersetCache(BaseCache):
+    def __init__(self, namespace: UUID, default_timeout: int = 300) -> None:
+        super().__init__(default_timeout)
+        self.namespace = namespace or uuid4()
+
+    @classmethod
+    def factory(
+        cls, app: Flask, config: Dict[str, Any], args: List[Any], kwargs: Dict[str, Any]
+    ) -> BaseCache:
+        # base namespace for generating deterministic UUIDs
+        md5_obj = md5()
+        seed = config.get("CACHE_KEY_PREFIX", "")
+        md5_obj.update(seed.encode("utf-8"))
+        kwargs["namespace"] = UUID(md5_obj.hexdigest())
+        return cls(*args, **kwargs)
+
+    def get_key(self, key: str) -> str:
+        return str(uuid3(self.namespace, key))
+
+    @staticmethod
+    def _purge() -> None:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.delete_expired import (
+            DeleteExpiredKeyValueCommand,
+        )
+
+        DeleteExpiredKeyValueCommand(resource=RESOURCE).run()
+
+    def get_expiry(self, timeout: Optional[int]) -> datetime:
+        return datetime.now() + timedelta(seconds=timeout or self.default_timeout)
+
+    def set(self, key: str, value: Any, timeout: Optional[int] = None) -> bool:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.delete import DeleteKeyValueCommand
+
+        try:

Review comment:
       Can't we just use the update command?

##########
File path: superset/key_value/cache.py
##########
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from datetime import datetime, timedelta
+from hashlib import md5
+from typing import Any, Dict, List, Optional
+from uuid import UUID, uuid3, uuid4
+
+from flask import Flask
+from flask_caching import BaseCache
+
+from superset.key_value.exceptions import KeyValueCreateFailedError
+from superset.key_value.types import KeyType
+
+RESOURCE = "superset_cache"
+KEY_TYPE: KeyType = "uuid"
+
+
+class SupersetCache(BaseCache):
+    def __init__(self, namespace: UUID, default_timeout: int = 300) -> None:
+        super().__init__(default_timeout)
+        self.namespace = namespace or uuid4()
+
+    @classmethod
+    def factory(
+        cls, app: Flask, config: Dict[str, Any], args: List[Any], kwargs: Dict[str, Any]
+    ) -> BaseCache:
+        # base namespace for generating deterministic UUIDs
+        md5_obj = md5()
+        seed = config.get("CACHE_KEY_PREFIX", "")
+        md5_obj.update(seed.encode("utf-8"))
+        kwargs["namespace"] = UUID(md5_obj.hexdigest())
+        return cls(*args, **kwargs)
+
+    def get_key(self, key: str) -> str:
+        return str(uuid3(self.namespace, key))
+
+    @staticmethod
+    def _purge() -> None:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.delete_expired import (
+            DeleteExpiredKeyValueCommand,
+        )
+
+        DeleteExpiredKeyValueCommand(resource=RESOURCE).run()
+
+    def get_expiry(self, timeout: Optional[int]) -> datetime:
+        return datetime.now() + timedelta(seconds=timeout or self.default_timeout)
+
+    def set(self, key: str, value: Any, timeout: Optional[int] = None) -> bool:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.delete import DeleteKeyValueCommand
+
+        try:
+            DeleteKeyValueCommand(
+                resource=RESOURCE, key_type=KEY_TYPE, key=self.get_key(key),
+            ).run()
+        except KeyValueCreateFailedError:
+            pass
+        return self.add(key, value, timeout)
+
+    def add(self, key: str, value: Any, timeout: Optional[int] = None) -> bool:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.create import CreateKeyValueCommand
+
+        try:
+            CreateKeyValueCommand(
+                resource=RESOURCE,
+                value=value,
+                key_type=KEY_TYPE,
+                key=self.get_key(key),
+                expires_on=self.get_expiry(timeout),
+            ).run()
+            self._purge()
+            return True
+        except KeyValueCreateFailedError:
+            return False
+
+    def get(self, key: str) -> Any:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.get import GetKeyValueCommand
+
+        return GetKeyValueCommand(
+            resource=RESOURCE, key_type=KEY_TYPE, key=self.get_key(key),
+        ).run()
+
+    def has(self, key: str) -> bool:
+        # pylint: disable=import-outside-toplevel
+        from superset.key_value.commands.get import GetKeyValueCommand
+
+        entry = GetKeyValueCommand(

Review comment:
       Maybe reference `get` here so you can keep the logic in just one place?

##########
File path: docs/docs/installation/cache.mdx
##########
@@ -18,10 +18,17 @@ The following cache configurations can be customized:
 - Dashboard filter state (required): `FILTER_STATE_CACHE_CONFIG`.
 - Explore chart form data (required): `EXPLORE_FORM_DATA_CACHE_CONFIG`
 
-Please note, that Dashboard and Explore caching is required. When running Superset in debug mode, both Explore and Dashboard caches will default to `SimpleCache`;
-However, trying to run Superset in non-debug mode without defining a cache for these will cause the application to fail on startup. When running
-superset in single-worker mode, any cache backend is supported. However, when running Superset in on a multi-worker setup, a dedicated cache is required. For this
-we recommend using either Redis or Memcached:
+Please note, that Dashboard and Explore caching is required. If these caches are undefined, Superset falls back to using a built-in cache that stores data
+in the metadata database. While it is recommended to use a dedicated cache, the built-in cache can also be used to cache other data.
+For example, to use the built-in cache to store chart data, use the following config:
+
+```python
+DATA_CACHE_CONFIG = {
+    "CACHE_TYPE": "SupersetCache",
+    "CACHE_KEY_PREFIX": "superset_results",  # make sure this string is unique to avoid collisions
+    "CACHE_DEFAULT_TIMEOUT": 86400,  # 60 seconds * 60 minutes * 24 hours

Review comment:
       Maybe use `int(timedelta(hours=24).total_seconds())`?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org