You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by gr...@apache.org on 2018/01/12 20:05:15 UTC

[incubator-superset] branch master updated: [cache] Using the query as the basis of the cache key (#4016)

This is an automated email from the ASF dual-hosted git repository.

graceguo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new a7a6678  [cache] Using the query as the basis of the cache key (#4016)
a7a6678 is described below

commit a7a6678d5ca535e29e6e021b7404c2e5c3599fdb
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Fri Jan 12 12:05:12 2018 -0800

    [cache] Using the query as the basis of the cache key (#4016)
---
 superset/assets/npm-debug.log.3344327073 |   0
 superset/viz.py                          | 115 ++++++++++++++++---------------
 tests/core_tests.py                      |   1 -
 tests/viz_tests.py                       |   5 +-
 4 files changed, 62 insertions(+), 59 deletions(-)

diff --git a/superset/assets/npm-debug.log.3344327073 b/superset/assets/npm-debug.log.3344327073
new file mode 100644
index 0000000..e69de29
diff --git a/superset/viz.py b/superset/viz.py
index d046241..62e02ac 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -214,8 +214,6 @@ class BaseViz(object):
 
     @property
     def cache_timeout(self):
-        if self.form_data.get('cache_timeout'):
-            return int(self.form_data.get('cache_timeout'))
         if self.datasource.cache_timeout:
             return self.datasource.cache_timeout
         if (
@@ -229,44 +227,50 @@ class BaseViz(object):
             self.get_payload(force),
             default=utils.json_int_dttm_ser, ignore_nan=True)
 
-    @property
-    def cache_key(self):
-        form_data = self.form_data.copy()
-        merge_extra_filters(form_data)
-        s = str([(k, form_data[k]) for k in sorted(form_data.keys())])
-        return hashlib.md5(s.encode('utf-8')).hexdigest()
+    def cache_key(self, query_obj):
+        """
+        The cache key is the datasource/query string tuple associated with the
+        object which needs to be fully deterministic.
+        """
+
+        return hashlib.md5(
+            json.dumps((
+                self.datasource.id,
+                self.datasource.get_query_str(query_obj),
+            )).encode('utf-8'),
+        ).hexdigest()
 
     def get_payload(self, force=False):
         """Handles caching around the json payload retrieval"""
-        cache_key = self.cache_key
-        payload = None
+        query_obj = self.query_obj()
+        cache_key = self.cache_key(query_obj)
+        cached_dttm = None
+        data = None
+        stacktrace = None
+        rowcount = None
         if not force and cache:
-            payload = cache.get(cache_key)
-
-        if payload:
-            stats_logger.incr('loaded_from_cache')
-            is_cached = True
-            try:
-                cached_data = zlib.decompress(payload)
-                if PY3:
-                    cached_data = cached_data.decode('utf-8')
-                payload = json.loads(cached_data)
-            except Exception as e:
-                logging.error('Error reading cache: ' +
-                              utils.error_msg_from_exception(e))
-                payload = None
-                return []
-            logging.info('Serving from cache')
+            cache_value = cache.get(cache_key)
+            if cache_value:
+                stats_logger.incr('loaded_from_cache')
+                is_cached = True
+                try:
+                    cache_value = zlib.decompress(cache_value)
+                    if PY3:
+                        cache_value = cache_value.decode('utf-8')
+                    cache_value = json.loads(cache_value)
+                    data = cache_value['data']
+                    cached_dttm = cache_value['dttm']
+                except Exception as e:
+                    logging.error('Error reading cache: ' +
+                                  utils.error_msg_from_exception(e))
+                    data = None
+                logging.info('Serving from cache')
 
-        if not payload:
+        if not data:
             stats_logger.incr('loaded_from_source')
-            data = None
             is_cached = False
-            cache_timeout = self.cache_timeout
-            stacktrace = None
-            rowcount = None
             try:
-                df = self.get_df()
+                df = self.get_df(query_obj)
                 if not self.error_message:
                     data = self.get_data(df)
                 rowcount = len(df.index) if df is not None else 0
@@ -277,37 +281,40 @@ class BaseViz(object):
                 self.status = utils.QueryStatus.FAILED
                 data = None
                 stacktrace = traceback.format_exc()
-            payload = {
-                'cache_key': cache_key,
-                'cache_timeout': cache_timeout,
-                'data': data,
-                'error': self.error_message,
-                'form_data': self.form_data,
-                'query': self.query,
-                'status': self.status,
-                'stacktrace': stacktrace,
-                'rowcount': rowcount,
-            }
-            payload['cached_dttm'] = datetime.utcnow().isoformat().split('.')[0]
-            logging.info('Caching for the next {} seconds'.format(
-                cache_timeout))
-            data = self.json_dumps(payload)
-            if PY3:
-                data = bytes(data, 'utf-8')
-            if cache and self.status != utils.QueryStatus.FAILED:
+
+            if data and cache and self.status != utils.QueryStatus.FAILED:
+                cached_dttm = datetime.utcnow().isoformat().split('.')[0]
                 try:
+                    cache_value = json.dumps({
+                        'data': data,
+                        'dttm': cached_dttm,
+                    })
+                    if PY3:
+                        cache_value = bytes(cache_value, 'utf-8')
                     cache.set(
                         cache_key,
-                        zlib.compress(data),
-                        timeout=cache_timeout)
+                        zlib.compress(cache_value),
+                        timeout=self.cache_timeout)
                 except Exception as e:
                     # cache.set call can fail if the backend is down or if
                     # the key is too large or whatever other reasons
                     logging.warning('Could not cache key {}'.format(cache_key))
                     logging.exception(e)
                     cache.delete(cache_key)
-        payload['is_cached'] = is_cached
-        return payload
+
+        return {
+            'cache_key': cache_key,
+            'cached_dttm': cached_dttm,
+            'cache_timeout': self.cache_timeout,
+            'data': data,
+            'error': self.error_message,
+            'form_data': self.form_data,
+            'is_cached': is_cached,
+            'query': self.query,
+            'status': self.status,
+            'stacktrace': stacktrace,
+            'rowcount': rowcount,
+        }
 
     def json_dumps(self, obj):
         return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
diff --git a/tests/core_tests.py b/tests/core_tests.py
index 8415465..a7edc4e 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -340,7 +340,6 @@ class CoreTests(SupersetTestCase):
         slc = self.get_slice('Girls', db.session)
         data = self.get_json_resp(
             '/superset/warm_up_cache?slice_id={}'.format(slc.id))
-
         assert data == [{'slice_id': slc.id, 'slice_name': slc.slice_name}]
 
         data = self.get_json_resp(
diff --git a/tests/viz_tests.py b/tests/viz_tests.py
index 67f4bf8..abf29ad 100644
--- a/tests/viz_tests.py
+++ b/tests/viz_tests.py
@@ -101,11 +101,8 @@ class BaseVizTestCase(unittest.TestCase):
 
     def test_cache_timeout(self):
         datasource = Mock()
-        form_data = {'cache_timeout': '10'}
-        test_viz = viz.BaseViz(datasource, form_data)
-        self.assertEqual(10, test_viz.cache_timeout)
-        del form_data['cache_timeout']
         datasource.cache_timeout = 156
+        test_viz = viz.BaseViz(datasource, form_data={})
         self.assertEqual(156, test_viz.cache_timeout)
         datasource.cache_timeout = None
         datasource.database = Mock()

-- 
To stop receiving notification emails like this one, please contact
['"commits@superset.apache.org" <co...@superset.apache.org>'].