You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/07/10 23:43:40 UTC
[superset] 01/01: chore(metastore-cache): add codec support (#24586)
This is an automated email from the ASF dual-hosted git repository.
elizabeth pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 6b965978d7a90a1bfb5f1134086c2a148f87065b
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Tue Jul 4 15:55:22 2023 +0300
chore(metastore-cache): add codec support (#24586)
---
superset/config.py | 25 ++++++++---
superset/extensions/metastore_cache.py | 38 +++++++++++++----
superset/key_value/exceptions.py | 12 ++++++
.../extensions/metastore_cache_test.py | 49 +++++++++++++++++++++-
4 files changed, 108 insertions(+), 16 deletions(-)
diff --git a/superset/config.py b/superset/config.py
index af46d489c9..c089416505 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -62,6 +62,7 @@ from superset.advanced_data_type.plugins.internet_port import internet_port
from superset.advanced_data_type.types import AdvancedDataType
from superset.constants import CHANGE_ME_SECRET_KEY
from superset.jinja_context import BaseTemplateProcessor
+from superset.key_value.types import JsonKeyValueCodec
from superset.stats_logger import DummyStatsLogger
from superset.superset_typing import CacheConfig
from superset.tasks.types import ExecutorType
@@ -686,20 +687,32 @@ CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
# Cache for datasource metadata and query results
DATA_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "NullCache"}
-# Cache for dashboard filter state (`CACHE_TYPE` defaults to `SimpleCache` when
-# running in debug mode unless overridden)
+# Cache for dashboard filter state. `CACHE_TYPE` defaults to `SupersetMetastoreCache`
+# that stores the values in the key-value table in the Superset metastore, as it's
+# required for Superset to operate correctly, but can be replaced by any
+# `Flask-Caching` backend.
FILTER_STATE_CACHE_CONFIG: CacheConfig = {
+ "CACHE_TYPE": "SupersetMetastoreCache",
"CACHE_DEFAULT_TIMEOUT": int(timedelta(days=90).total_seconds()),
- # should the timeout be reset when retrieving a cached value
+ # Should the timeout be reset when retrieving a cached value?
"REFRESH_TIMEOUT_ON_RETRIEVAL": True,
+ # The following parameter only applies to `MetastoreCache`:
+ # How should entries be serialized/deserialized?
+ "CODEC": JsonKeyValueCodec(),
}
-# Cache for explore form data state (`CACHE_TYPE` defaults to `SimpleCache` when
-# running in debug mode unless overridden)
+# Cache for explore form data state. `CACHE_TYPE` defaults to `SupersetMetastoreCache`
+# that stores the values in the key-value table in the Superset metastore, as it's
+# required for Superset to operate correctly, but can be replaced by any
+# `Flask-Caching` backend.
EXPLORE_FORM_DATA_CACHE_CONFIG: CacheConfig = {
+ "CACHE_TYPE": "SupersetMetastoreCache",
"CACHE_DEFAULT_TIMEOUT": int(timedelta(days=7).total_seconds()),
- # should the timeout be reset when retrieving a cached value
+ # Should the timeout be reset when retrieving a cached value?
"REFRESH_TIMEOUT_ON_RETRIEVAL": True,
+ # The following parameter only applies to `MetastoreCache`:
+ # How should entries be serialized/deserialized?
+ "CODEC": JsonKeyValueCodec(),
}
# store cache keys by datasource UID (via CacheKey) for custom processing/invalidation
diff --git a/superset/extensions/metastore_cache.py b/superset/extensions/metastore_cache.py
index f69276c908..0dbd42b2b4 100644
--- a/superset/extensions/metastore_cache.py
+++ b/superset/extensions/metastore_cache.py
@@ -14,26 +14,37 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
+import logging
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
from uuid import UUID, uuid3
-from flask import Flask
+from flask import current_app, Flask, has_app_context
from flask_caching import BaseCache
from superset.key_value.exceptions import KeyValueCreateFailedError
-from superset.key_value.types import KeyValueResource, PickleKeyValueCodec
+from superset.key_value.types import (
+ KeyValueCodec,
+ KeyValueResource,
+ PickleKeyValueCodec,
+)
from superset.key_value.utils import get_uuid_namespace
RESOURCE = KeyValueResource.METASTORE_CACHE
-CODEC = PickleKeyValueCodec()
+
+logger = logging.getLogger(__name__)
class SupersetMetastoreCache(BaseCache):
- def __init__(self, namespace: UUID, default_timeout: int = 300) -> None:
+ def __init__(
+ self,
+ namespace: UUID,
+ codec: KeyValueCodec,
+ default_timeout: int = 300,
+ ) -> None:
super().__init__(default_timeout)
self.namespace = namespace
+ self.codec = codec
@classmethod
def factory(
@@ -41,6 +52,17 @@ class SupersetMetastoreCache(BaseCache):
) -> BaseCache:
seed = config.get("CACHE_KEY_PREFIX", "")
kwargs["namespace"] = get_uuid_namespace(seed)
+ codec = config.get("CODEC") or PickleKeyValueCodec()
+ if (
+ has_app_context()
+ and not current_app.debug
+ and isinstance(codec, PickleKeyValueCodec)
+ ):
+ logger.warning(
+ "Using PickleKeyValueCodec with SupersetMetastoreCache may be unsafe, "
+ "use at your own risk."
+ )
+ kwargs["codec"] = codec
return cls(*args, **kwargs)
def get_key(self, key: str) -> UUID:
@@ -69,7 +91,7 @@ class SupersetMetastoreCache(BaseCache):
resource=RESOURCE,
key=self.get_key(key),
value=value,
- codec=CODEC,
+ codec=self.codec,
expires_on=self._get_expiry(timeout),
).run()
return True
@@ -82,7 +104,7 @@ class SupersetMetastoreCache(BaseCache):
CreateKeyValueCommand(
resource=RESOURCE,
value=value,
- codec=CODEC,
+ codec=self.codec,
key=self.get_key(key),
expires_on=self._get_expiry(timeout),
).run()
@@ -98,7 +120,7 @@ class SupersetMetastoreCache(BaseCache):
return GetKeyValueCommand(
resource=RESOURCE,
key=self.get_key(key),
- codec=CODEC,
+ codec=self.codec,
).run()
def has(self, key: str) -> bool:
diff --git a/superset/key_value/exceptions.py b/superset/key_value/exceptions.py
index b05daf6b89..e16f961872 100644
--- a/superset/key_value/exceptions.py
+++ b/superset/key_value/exceptions.py
@@ -52,3 +52,15 @@ class KeyValueUpsertFailedError(UpdateFailedError):
class KeyValueAccessDeniedError(ForbiddenError):
message = _("You don't have permission to modify the value.")
+
+
+class KeyValueCodecException(SupersetException):
+ pass
+
+
+class KeyValueCodecEncodeException(KeyValueCodecException):
+ message = _("Unable to encode value")
+
+
+class KeyValueCodecDecodeException(KeyValueCodecException):
+ message = _("Unable to decode value")
diff --git a/tests/integration_tests/extensions/metastore_cache_test.py b/tests/integration_tests/extensions/metastore_cache_test.py
index d9e0e9ee26..eeb7279fe1 100644
--- a/tests/integration_tests/extensions/metastore_cache_test.py
+++ b/tests/integration_tests/extensions/metastore_cache_test.py
@@ -16,17 +16,27 @@
# under the License.
from __future__ import annotations
+from contextlib import nullcontext
from datetime import datetime, timedelta
-from typing import TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
from uuid import UUID
import pytest
from flask.ctx import AppContext
from freezegun import freeze_time
+from superset.key_value.exceptions import KeyValueCodecEncodeException
+from superset.key_value.types import (
+ JsonKeyValueCodec,
+ KeyValueCodec,
+ PickleKeyValueCodec,
+)
+
if TYPE_CHECKING:
from superset.extensions.metastore_cache import SupersetMetastoreCache
+NAMESPACE = UUID("ee173d1b-ccf3-40aa-941c-985c15224496")
+
FIRST_KEY = "foo"
FIRST_KEY_INITIAL_VALUE = {"foo": "bar"}
FIRST_KEY_UPDATED_VALUE = "foo"
@@ -40,8 +50,9 @@ def cache() -> SupersetMetastoreCache:
from superset.extensions.metastore_cache import SupersetMetastoreCache
return SupersetMetastoreCache(
- namespace=UUID("ee173d1b-ccf3-40aa-941c-985c15224496"),
+ namespace=NAMESPACE,
default_timeout=600,
+ codec=PickleKeyValueCodec(),
)
@@ -75,3 +86,37 @@ def test_expiry(app_context: AppContext, cache: SupersetMetastoreCache) -> None:
with freeze_time(dttm + delta + timedelta(seconds=1)):
assert cache.has(FIRST_KEY) is False
assert cache.get(FIRST_KEY) is None
+
+
+@pytest.mark.parametrize(
+ "input_,codec,expected_result",
+ [
+ ({"foo": "bar"}, JsonKeyValueCodec(), {"foo": "bar"}),
+ (("foo", "bar"), JsonKeyValueCodec(), ["foo", "bar"]),
+ (complex(1, 1), JsonKeyValueCodec(), KeyValueCodecEncodeException()),
+ ({"foo": "bar"}, PickleKeyValueCodec(), {"foo": "bar"}),
+ (("foo", "bar"), PickleKeyValueCodec(), ("foo", "bar")),
+ (complex(1, 1), PickleKeyValueCodec(), complex(1, 1)),
+ ],
+)
+def test_codec(
+ input_: Any,
+ codec: KeyValueCodec,
+ expected_result: Any,
+ app_context: AppContext,
+) -> None:
+ from superset.extensions.metastore_cache import SupersetMetastoreCache
+
+ cache = SupersetMetastoreCache(
+ namespace=NAMESPACE,
+ default_timeout=600,
+ codec=codec,
+ )
+ cm = (
+ pytest.raises(type(expected_result))
+ if isinstance(expected_result, Exception)
+ else nullcontext()
+ )
+ with cm:
+ cache.set(FIRST_KEY, input_)
+ assert cache.get(FIRST_KEY) == expected_result