You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by gr...@apache.org on 2018/01/12 20:05:15 UTC
[incubator-superset] branch master updated: [cache] Using the query
as the basis of the cache key (#4016)
This is an automated email from the ASF dual-hosted git repository.
graceguo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new a7a6678 [cache] Using the query as the basis of the cache key (#4016)
a7a6678 is described below
commit a7a6678d5ca535e29e6e021b7404c2e5c3599fdb
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Fri Jan 12 12:05:12 2018 -0800
[cache] Using the query as the basis of the cache key (#4016)
---
superset/assets/npm-debug.log.3344327073 | 0
superset/viz.py | 115 ++++++++++++++++---------------
tests/core_tests.py | 1 -
tests/viz_tests.py | 5 +-
4 files changed, 62 insertions(+), 59 deletions(-)
diff --git a/superset/assets/npm-debug.log.3344327073 b/superset/assets/npm-debug.log.3344327073
new file mode 100644
index 0000000..e69de29
diff --git a/superset/viz.py b/superset/viz.py
index d046241..62e02ac 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -214,8 +214,6 @@ class BaseViz(object):
@property
def cache_timeout(self):
- if self.form_data.get('cache_timeout'):
- return int(self.form_data.get('cache_timeout'))
if self.datasource.cache_timeout:
return self.datasource.cache_timeout
if (
@@ -229,44 +227,50 @@ class BaseViz(object):
self.get_payload(force),
default=utils.json_int_dttm_ser, ignore_nan=True)
- @property
- def cache_key(self):
- form_data = self.form_data.copy()
- merge_extra_filters(form_data)
- s = str([(k, form_data[k]) for k in sorted(form_data.keys())])
- return hashlib.md5(s.encode('utf-8')).hexdigest()
+ def cache_key(self, query_obj):
+ """
+ The cache key is the datasource/query string tuple associated with the
+ object which needs to be fully deterministic.
+ """
+
+ return hashlib.md5(
+ json.dumps((
+ self.datasource.id,
+ self.datasource.get_query_str(query_obj),
+ )).encode('utf-8'),
+ ).hexdigest()
def get_payload(self, force=False):
"""Handles caching around the json payload retrieval"""
- cache_key = self.cache_key
- payload = None
+ query_obj = self.query_obj()
+ cache_key = self.cache_key(query_obj)
+ cached_dttm = None
+ data = None
+ stacktrace = None
+ rowcount = None
if not force and cache:
- payload = cache.get(cache_key)
-
- if payload:
- stats_logger.incr('loaded_from_cache')
- is_cached = True
- try:
- cached_data = zlib.decompress(payload)
- if PY3:
- cached_data = cached_data.decode('utf-8')
- payload = json.loads(cached_data)
- except Exception as e:
- logging.error('Error reading cache: ' +
- utils.error_msg_from_exception(e))
- payload = None
- return []
- logging.info('Serving from cache')
+ cache_value = cache.get(cache_key)
+ if cache_value:
+ stats_logger.incr('loaded_from_cache')
+ is_cached = True
+ try:
+ cache_value = zlib.decompress(cache_value)
+ if PY3:
+ cache_value = cache_value.decode('utf-8')
+ cache_value = json.loads(cache_value)
+ data = cache_value['data']
+ cached_dttm = cache_value['dttm']
+ except Exception as e:
+ logging.error('Error reading cache: ' +
+ utils.error_msg_from_exception(e))
+ data = None
+ logging.info('Serving from cache')
- if not payload:
+ if not data:
stats_logger.incr('loaded_from_source')
- data = None
is_cached = False
- cache_timeout = self.cache_timeout
- stacktrace = None
- rowcount = None
try:
- df = self.get_df()
+ df = self.get_df(query_obj)
if not self.error_message:
data = self.get_data(df)
rowcount = len(df.index) if df is not None else 0
@@ -277,37 +281,40 @@ class BaseViz(object):
self.status = utils.QueryStatus.FAILED
data = None
stacktrace = traceback.format_exc()
- payload = {
- 'cache_key': cache_key,
- 'cache_timeout': cache_timeout,
- 'data': data,
- 'error': self.error_message,
- 'form_data': self.form_data,
- 'query': self.query,
- 'status': self.status,
- 'stacktrace': stacktrace,
- 'rowcount': rowcount,
- }
- payload['cached_dttm'] = datetime.utcnow().isoformat().split('.')[0]
- logging.info('Caching for the next {} seconds'.format(
- cache_timeout))
- data = self.json_dumps(payload)
- if PY3:
- data = bytes(data, 'utf-8')
- if cache and self.status != utils.QueryStatus.FAILED:
+
+ if data and cache and self.status != utils.QueryStatus.FAILED:
+ cached_dttm = datetime.utcnow().isoformat().split('.')[0]
try:
+ cache_value = json.dumps({
+ 'data': data,
+ 'dttm': cached_dttm,
+ })
+ if PY3:
+ cache_value = bytes(cache_value, 'utf-8')
cache.set(
cache_key,
- zlib.compress(data),
- timeout=cache_timeout)
+ zlib.compress(cache_value),
+ timeout=self.cache_timeout)
except Exception as e:
# cache.set call can fail if the backend is down or if
# the key is too large or whatever other reasons
logging.warning('Could not cache key {}'.format(cache_key))
logging.exception(e)
cache.delete(cache_key)
- payload['is_cached'] = is_cached
- return payload
+
+ return {
+ 'cache_key': cache_key,
+ 'cached_dttm': cached_dttm,
+ 'cache_timeout': self.cache_timeout,
+ 'data': data,
+ 'error': self.error_message,
+ 'form_data': self.form_data,
+ 'is_cached': is_cached,
+ 'query': self.query,
+ 'status': self.status,
+ 'stacktrace': stacktrace,
+ 'rowcount': rowcount,
+ }
def json_dumps(self, obj):
return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
diff --git a/tests/core_tests.py b/tests/core_tests.py
index 8415465..a7edc4e 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -340,7 +340,6 @@ class CoreTests(SupersetTestCase):
slc = self.get_slice('Girls', db.session)
data = self.get_json_resp(
'/superset/warm_up_cache?slice_id={}'.format(slc.id))
-
assert data == [{'slice_id': slc.id, 'slice_name': slc.slice_name}]
data = self.get_json_resp(
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index 67f4bf8..abf29ad 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -101,11 +101,8 @@ class BaseVizTestCase(unittest.TestCase):
def test_cache_timeout(self):
datasource = Mock()
- form_data = {'cache_timeout': '10'}
- test_viz = viz.BaseViz(datasource, form_data)
- self.assertEqual(10, test_viz.cache_timeout)
- del form_data['cache_timeout']
datasource.cache_timeout = 156
+ test_viz = viz.BaseViz(datasource, form_data={})
self.assertEqual(156, test_viz.cache_timeout)
datasource.cache_timeout = None
datasource.database = Mock()
--
To stop receiving notification emails like this one, please contact
['"commits@superset.apache.org" <co...@superset.apache.org>'].