You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2020/01/16 05:51:23 UTC
[incubator-superset] branch master updated: fix: add
datasource.changed_on to cache_key (#8901)
This is an automated email from the ASF dual-hosted git repository.
maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new c087a48 fix: add datasource.changed_on to cache_key (#8901)
c087a48 is described below
commit c087a48d5289767e0ae29205318c11a27963d496
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Thu Jan 16 07:51:13 2020 +0200
fix: add datasource.changed_on to cache_key (#8901)
* Add datasource.changed_on to cache_key and add+fix related unit tests
* Add note to UPDATING.md
* Remove redundant comment about metric names
---
UPDATING.md | 14 +++++---
setup.cfg | 2 +-
superset/common/query_context.py | 13 ++++---
superset/common/query_object.py | 7 +---
superset/viz.py | 1 +
tests/core_tests.py | 76 +++++++++++++++++++++++++++-------------
6 files changed, 73 insertions(+), 40 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index aebe63a..533414f 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -22,8 +22,12 @@ This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version.
## Next
+* [8901](https://github.com/apache/incubator-superset/pull/8901): The datasource's update
+timestamp has been added to the query object's cache key to ensure updates to
+datasources are always reflected in associated query results. As a consequence all
+previously cached results will be invalidated when updating to the next version.
-* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default.
+* [8732](https://github.com/apache/incubator-superset/pull/8732): Swagger user interface is now enabled by default.
A new permission `show on SwaggerView` is created by `superset init` and given to the `Admin` Role. To disable the UI,
set `FAB_API_SWAGGER_UI = False` on config.
@@ -90,9 +94,9 @@ which adds missing non-nullable fields to the `datasources` table. Depending on
the integrity of the data, manual intervention may be required.
* [5452](https://github.com/apache/incubator-superset/pull/5452): a change
-which adds missing non-nullable fields and uniqueness constraints (which may be
-case insensitive depending on your database configuration) to the `columns`and
-`table_columns` tables. Depending on the integrity of the data, manual
+which adds missing non-nullable fields and uniqueness constraints (which may be
+case insensitive depending on your database configuration) to the `columns`and
+`table_columns` tables. Depending on the integrity of the data, manual
intervention may be required.
* `fabmanager` command line is deprecated since Flask-AppBuilder 2.0.0, use
the new `flask fab <command>` integrated with *Flask cli*.
@@ -100,7 +104,7 @@ the new `flask fab <command>` integrated with *Flask cli*.
`FAB_UPDATE_PERMS` config boolean key. To disable automatic
creation of permissions set `FAB_UPDATE_PERMS = False` on config.
* [5453](https://github.com/apache/incubator-superset/pull/5453): a change
-which adds missing non-nullable fields and uniqueness constraints (which may be
+which adds missing non-nullable fields and uniqueness constraints (which may be
case insensitive depending on your database configuration) to the metrics
and sql_metrics tables. Depending on the integrity of the data, manual
intervention may be required.
diff --git a/setup.cfg b/setup.cfg
index 20c7b74..8ca150e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -45,7 +45,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
-known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,psycopg2,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,we [...]
+known_third_party =alembic,backoff,bleach,celery,click,colorama,contextlib2,croniter,dateutil,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,geohash,geopy,humanize,isodate,jinja2,markdown,markupsafe,marshmallow,msgpack,numpy,pandas,parsedatetime,pathlib2,polyline,prison,pyarrow,pyhive,pytz,retry,selenium,setuptools,simplejson,sphinx_rtd_theme,sqlalchemy,sqlalchemy_utils,sqlparse,werkzeug,wt [...]
multi_line_output = 3
order_by_type = false
diff --git a/superset/common/query_context.py b/superset/common/query_context.py
index ae57a63..e0bf2b5 100644
--- a/superset/common/query_context.py
+++ b/superset/common/query_context.py
@@ -156,20 +156,25 @@ class QueryContext:
return self.datasource.database.cache_timeout
return config["CACHE_DEFAULT_TIMEOUT"]
- def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
- self, query_obj: QueryObject, **kwargs
- ) -> Dict[str, Any]:
- """Handles caching around the df payload retrieval"""
+ def cache_key(self, query_obj: QueryObject, **kwargs) -> Optional[str]:
extra_cache_keys = self.datasource.get_extra_cache_keys(query_obj.to_dict())
cache_key = (
query_obj.cache_key(
datasource=self.datasource.uid,
extra_cache_keys=extra_cache_keys,
+ changed_on=self.datasource.changed_on,
**kwargs
)
if query_obj
else None
)
+ return cache_key
+
+ def get_df_payload( # pylint: disable=too-many-locals,too-many-statements
+ self, query_obj: QueryObject, **kwargs
+ ) -> Dict[str, Any]:
+ """Handles caching around the df payload retrieval"""
+ cache_key = self.cache_key(query_obj, **kwargs)
logging.info("Cache key: %s", cache_key)
is_loaded = False
stacktrace = None
diff --git a/superset/common/query_object.py b/superset/common/query_object.py
index 21649d1..dad50ea 100644
--- a/superset/common/query_object.py
+++ b/superset/common/query_object.py
@@ -81,12 +81,7 @@ class QueryObject:
self.time_shift = utils.parse_human_timedelta(time_shift)
self.groupby = groupby or []
- # Temporal solution for backward compatability issue
- # due the new format of non-ad-hoc metric.
- self.metrics = [
- metric if "expressionType" in metric else metric["label"] # type: ignore
- for metric in metrics
- ]
+ self.metrics = [utils.get_metric_name(metric) for metric in metrics]
self.row_limit = row_limit
self.filter = filters or []
self.timeseries_limit = timeseries_limit
diff --git a/superset/viz.py b/superset/viz.py
index 617ccdc..a258636 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -372,6 +372,7 @@ class BaseViz:
cache_dict["time_range"] = self.form_data.get("time_range")
cache_dict["datasource"] = self.datasource.uid
cache_dict["extra_cache_keys"] = self.datasource.get_extra_cache_keys(query_obj)
+ cache_dict["changed_on"] = self.datasource.changed_on
json_data = self.json_dumps(cache_dict, sort_keys=True)
return hashlib.md5(json_data.encode("utf-8")).hexdigest()
diff --git a/tests/core_tests.py b/tests/core_tests.py
index 2b1b840..060b292 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -28,15 +28,17 @@ import pytz
import random
import re
import string
+from typing import Any, Dict
import unittest
from unittest import mock
import pandas as pd
-import psycopg2
import sqlalchemy as sqla
from tests.test_app import app
from superset import dataframe, db, jinja_context, security_manager, sql_lab
+from superset.common.query_context import QueryContext
+from superset.connectors.connector_registry import ConnectorRegistry
from superset.connectors.sqla.models import SqlaTable
from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.mssql import MssqlEngineSpec
@@ -98,7 +100,23 @@ class CoreTests(SupersetTestCase):
resp = self.client.get("/superset/slice/-1/")
assert resp.status_code == 404
- def test_cache_key(self):
+ def _get_query_context_dict(self) -> Dict[str, Any]:
+ self.login(username="admin")
+ slc = self.get_slice("Girl Name Cloud", db.session)
+ return {
+ "datasource": {"id": slc.datasource_id, "type": slc.datasource_type},
+ "queries": [
+ {
+ "granularity": "ds",
+ "groupby": ["name"],
+ "metrics": ["sum__num"],
+ "filters": [],
+ "row_limit": 100,
+ }
+ ],
+ }
+
+ def test_viz_cache_key(self):
self.login(username="admin")
slc = self.get_slice("Girls", db.session)
@@ -110,30 +128,40 @@ class CoreTests(SupersetTestCase):
qobj["groupby"] = []
self.assertNotEqual(cache_key, viz.cache_key(qobj))
+ def test_cache_key_changes_when_datasource_is_updated(self):
+ qc_dict = self._get_query_context_dict()
+
+ # construct baseline cache_key
+ query_context = QueryContext(**qc_dict)
+ query_object = query_context.queries[0]
+ cache_key_original = query_context.cache_key(query_object)
+
+ # make temporary change and revert it to refresh the changed_on property
+ datasource = ConnectorRegistry.get_datasource(
+ datasource_type=qc_dict["datasource"]["type"],
+ datasource_id=qc_dict["datasource"]["id"],
+ session=db.session,
+ )
+ description_original = datasource.description
+ datasource.description = "temporary description"
+ db.session.commit()
+ datasource.description = description_original
+ db.session.commit()
+
+ # create new QueryContext with unchanged attributes and extract new cache_key
+ query_context = QueryContext(**qc_dict)
+ query_object = query_context.queries[0]
+ cache_key_new = query_context.cache_key(query_object)
+
+ # the new cache_key should be different due to updated datasource
+ self.assertNotEqual(cache_key_original, cache_key_new)
+
def test_api_v1_query_endpoint(self):
self.login(username="admin")
- slc = self.get_slice("Girl Name Cloud", db.session)
- form_data = slc.form_data
- data = json.dumps(
- {
- "datasource": {"id": slc.datasource_id, "type": slc.datasource_type},
- "queries": [
- {
- "granularity": "ds",
- "groupby": ["name"],
- "metrics": ["sum__num"],
- "filters": [],
- "time_range": "{} : {}".format(
- form_data.get("since"), form_data.get("until")
- ),
- "limit": 100,
- }
- ],
- }
- )
- # TODO: update once get_data is implemented for QueryObject
- with self.assertRaises(Exception):
- self.get_resp("/api/v1/query/", {"query_context": data})
+ qc_dict = self._get_query_context_dict()
+ data = json.dumps(qc_dict)
+ resp = json.loads(self.get_resp("/api/v1/query/", {"query_context": data}))
+ self.assertEqual(resp[0]["rowcount"], 100)
def test_old_slice_json_endpoint(self):
self.login(username="admin")