You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/07/08 00:57:06 UTC

[superset] branch 2.1 updated: chore(metastore-cache): add codec support (#24586)

This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new c1dea59280 chore(metastore-cache): add codec support (#24586)
c1dea59280 is described below

commit c1dea59280ba386cbc9d7c38593645a4ca4df947
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Tue Jul 4 15:55:22 2023 +0300

    chore(metastore-cache): add codec support (#24586)
---
 superset/config.py                                 | 25 ++++++++---
 superset/extensions/metastore_cache.py             | 38 +++++++++++++----
 .../extensions/metastore_cache_test.py             | 49 +++++++++++++++++++++-
 3 files changed, 96 insertions(+), 16 deletions(-)

diff --git a/superset/config.py b/superset/config.py
index af46d489c9..c089416505 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -62,6 +62,7 @@ from superset.advanced_data_type.plugins.internet_port import internet_port
 from superset.advanced_data_type.types import AdvancedDataType
 from superset.constants import CHANGE_ME_SECRET_KEY
 from superset.jinja_context import BaseTemplateProcessor
+from superset.key_value.types import JsonKeyValueCodec
 from superset.stats_logger import DummyStatsLogger
 from superset.superset_typing import CacheConfig
 from superset.tasks.types import ExecutorType
@@ -686,20 +687,32 @@ CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
 # Cache for datasource metadata and query results
 DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
 
-# Cache for dashboard filter state (`CACHE_TYPE` defaults to `SimpleCache` when
-#  running in debug mode unless overridden)
+# Cache for dashboard filter state. `CACHE_TYPE` defaults to `SupersetMetastoreCache`
+# that stores the values in the key-value table in the Superset metastore, as it's
+# required for Superset to operate correctly, but can be replaced by any
+# `Flask-Caching` backend.
 FILTER_STATE_CACHE_CONFIG: CacheConfig = {
+    "CACHE_TYPE": "SupersetMetastoreCache",
     "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()),
-    # should the timeout be reset when retrieving a cached value
+    # Should the timeout be reset when retrieving a cached value?
     "REFRESH_TIMEOUT_ON_RETRIEVAL": True,
+    # The following parameter only applies to `MetastoreCache`:
+    # How should entries be serialized/deserialized?
+    "CODEC": JsonKeyValueCodec(),
 }
 
-# Cache for explore form data state (`CACHE_TYPE` defaults to `SimpleCache` when
-#  running in debug mode unless overridden)
+# Cache for explore form data state. `CACHE_TYPE` defaults to `SupersetMetastoreCache`
+# that stores the values in the key-value table in the Superset metastore, as it's
+# required for Superset to operate correctly, but can be replaced by any
+# `Flask-Caching` backend.
 EXPLORE_FORM_DATA_CACHE_CONFIG: CacheConfig = {
+    "CACHE_TYPE": "SupersetMetastoreCache",
     "CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()),
-    # should the timeout be reset when retrieving a cached value
+    # Should the timeout be reset when retrieving a cached value?
     "REFRESH_TIMEOUT_ON_RETRIEVAL": True,
+    # The following parameter only applies to `MetastoreCache`:
+    # How should entries be serialized/deserialized?
+    "CODEC": JsonKeyValueCodec(),
 }
 
 # store cache keys by datasource UID (via CacheKey) for custom processing/invalidation
diff --git a/superset/extensions/metastore_cache.py b/superset/extensions/metastore_cache.py
index f69276c908..0dbd42b2b4 100644
--- a/superset/extensions/metastore_cache.py
+++ b/superset/extensions/metastore_cache.py
@@ -14,26 +14,37 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+import logging
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, Optional
 from uuid import UUID, uuid3
 
-from flask import Flask
+from flask import current_app, Flask, has_app_context
 from flask_caching import BaseCache
 
 from superset.key_value.exceptions import KeyValueCreateFailedError
-from superset.key_value.types import KeyValueResource, PickleKeyValueCodec
+from superset.key_value.types import (
+    KeyValueCodec,
+    KeyValueResource,
+    PickleKeyValueCodec,
+)
 from superset.key_value.utils import get_uuid_namespace
 
 RESOURCE = KeyValueResource.METASTORE_CACHE
-CODEC = PickleKeyValueCodec()
+
+logger = logging.getLogger(__name__)
 
 
 class SupersetMetastoreCache(BaseCache):
-    def __init__(self, namespace: UUID, default_timeout: int = 300) -> None:
+    def __init__(
+        self,
+        namespace: UUID,
+        codec: KeyValueCodec,
+        default_timeout: int = 300,
+    ) -> None:
         super().__init__(default_timeout)
         self.namespace = namespace
+        self.codec = codec
 
     @classmethod
     def factory(
@@ -41,6 +52,17 @@ class SupersetMetastoreCache(BaseCache):
     ) -> BaseCache:
         seed = config.get("CACHE_KEY_PREFIX", "")
         kwargs["namespace"] = get_uuid_namespace(seed)
+        codec = config.get("CODEC") or PickleKeyValueCodec()
+        if (
+            has_app_context()
+            and not current_app.debug
+            and isinstance(codec, PickleKeyValueCodec)
+        ):
+            logger.warning(
+                "Using PickleKeyValueCodec with SupersetMetastoreCache may be unsafe, "
+                "use at your own risk."
+            )
+        kwargs["codec"] = codec
         return cls(*args, **kwargs)
 
     def get_key(self, key: str) -> UUID:
@@ -69,7 +91,7 @@ class SupersetMetastoreCache(BaseCache):
             resource=RESOURCE,
             key=self.get_key(key),
             value=value,
-            codec=CODEC,
+            codec=self.codec,
             expires_on=self._get_expiry(timeout),
         ).run()
         return True
@@ -82,7 +104,7 @@ class SupersetMetastoreCache(BaseCache):
             CreateKeyValueCommand(
                 resource=RESOURCE,
                 value=value,
-                codec=CODEC,
+                codec=self.codec,
                 key=self.get_key(key),
                 expires_on=self._get_expiry(timeout),
             ).run()
@@ -98,7 +120,7 @@ class SupersetMetastoreCache(BaseCache):
         return GetKeyValueCommand(
             resource=RESOURCE,
             key=self.get_key(key),
-            codec=CODEC,
+            codec=self.codec,
         ).run()
 
     def has(self, key: str) -> bool:
diff --git a/tests/integration_tests/extensions/metastore_cache_test.py b/tests/integration_tests/extensions/metastore_cache_test.py
index d9e0e9ee26..eeb7279fe1 100644
--- a/tests/integration_tests/extensions/metastore_cache_test.py
+++ b/tests/integration_tests/extensions/metastore_cache_test.py
@@ -16,17 +16,27 @@
 # under the License.
 from __future__ import annotations
 
+from contextlib import nullcontext
 from datetime import datetime, timedelta
-from typing import TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
 from uuid import UUID
 
 import pytest
 from flask.ctx import AppContext
 from freezegun import freeze_time
 
+from superset.key_value.exceptions import KeyValueCodecEncodeException
+from superset.key_value.types import (
+    JsonKeyValueCodec,
+    KeyValueCodec,
+    PickleKeyValueCodec,
+)
+
 if TYPE_CHECKING:
     from superset.extensions.metastore_cache import SupersetMetastoreCache
 
+NAMESPACE = UUID("ee173d1b-ccf3-40aa-941c-985c15224496")
+
 FIRST_KEY = "foo"
 FIRST_KEY_INITIAL_VALUE = {"foo": "bar"}
 FIRST_KEY_UPDATED_VALUE = "foo"
@@ -40,8 +50,9 @@ def cache() -> SupersetMetastoreCache:
     from superset.extensions.metastore_cache import SupersetMetastoreCache
 
     return SupersetMetastoreCache(
-        namespace=UUID("ee173d1b-ccf3-40aa-941c-985c15224496"),
+        namespace=NAMESPACE,
         default_timeout=600,
+        codec=PickleKeyValueCodec(),
     )
 
 
@@ -75,3 +86,37 @@ def test_expiry(app_context: AppContext, cache: SupersetMetastoreCache) -> None:
     with freeze_time(dttm + delta + timedelta(seconds=1)):
         assert cache.has(FIRST_KEY) is False
         assert cache.get(FIRST_KEY) is None
+
+
+@pytest.mark.parametrize(
+    "input_,codec,expected_result",
+    [
+        ({"foo": "bar"}, JsonKeyValueCodec(), {"foo": "bar"}),
+        (("foo", "bar"), JsonKeyValueCodec(), ["foo", "bar"]),
+        (complex(1, 1), JsonKeyValueCodec(), KeyValueCodecEncodeException()),
+        ({"foo": "bar"}, PickleKeyValueCodec(), {"foo": "bar"}),
+        (("foo", "bar"), PickleKeyValueCodec(), ("foo", "bar")),
+        (complex(1, 1), PickleKeyValueCodec(), complex(1, 1)),
+    ],
+)
+def test_codec(
+    input_: Any,
+    codec: KeyValueCodec,
+    expected_result: Any,
+    app_context: AppContext,
+) -> None:
+    from superset.extensions.metastore_cache import SupersetMetastoreCache
+
+    cache = SupersetMetastoreCache(
+        namespace=NAMESPACE,
+        default_timeout=600,
+        codec=codec,
+    )
+    cm = (
+        pytest.raises(type(expected_result))
+        if isinstance(expected_result, Exception)
+        else nullcontext()
+    )
+    with cm:
+        cache.set(FIRST_KEY, input_)
+        assert cache.get(FIRST_KEY) == expected_result