You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ma...@apache.org on 2018/01/28 17:46:15 UTC

[incubator-superset] branch master updated: Use the query_obj as the basis for the cache key (#4260)

This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new e4a95f9  Use the query_obj as the basis for the cache key (#4260)
e4a95f9 is described below

commit e4a95f9428cf442256022a6e5d90875a4833a4f8
Author: Maxime Beauchemin <ma...@gmail.com>
AuthorDate: Sun Jan 28 09:46:13 2018 -0800

    Use the query_obj as the basis for the cache key (#4260)
    
    * Use the query_obj as the basis for the cache key
    
    When we recently moved from hashing form_data to define the cache_key
    towards using the rendered query instead,
    it made is such that non deterministic form
    control values like relative times specified in "from" and "until" time
    bound resulted in making those miss cache 100% of the time.
    
    Here we move away from using the rendered query and using the query_obj
    instead.
    
    * Deprecating using form_data in templates
---
 superset/connectors/druid/models.py |  2 +-
 superset/connectors/sqla/models.py  |  3 ---
 superset/viz.py                     | 37 ++++++++++++++++++++++++-------------
 tests/core_tests.py                 | 12 ++++++++++++
 4 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py
index b3582e3..edd7ec1 100644
--- a/superset/connectors/druid/models.py
+++ b/superset/connectors/druid/models.py
@@ -1040,7 +1040,7 @@ class DruidDatasource(Model, BaseDatasource):
             inner_from_dttm=None, inner_to_dttm=None,
             orderby=None,
             extras=None,  # noqa
-            columns=None, phase=2, client=None, form_data=None,
+            columns=None, phase=2, client=None,
             order_desc=True,
             prequeries=None,
             is_prequery=False,
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 6ccddbe..b2e21c7 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -446,7 +446,6 @@ class SqlaTable(Model, BaseDatasource):
             orderby=None,
             extras=None,
             columns=None,
-            form_data=None,
             order_desc=True,
             prequeries=None,
             is_prequery=False,
@@ -458,7 +457,6 @@ class SqlaTable(Model, BaseDatasource):
             'metrics': metrics,
             'row_limit': row_limit,
             'to_dttm': to_dttm,
-            'form_data': form_data,
         }
         template_processor = self.get_template_processor(**template_kwargs)
         db_engine_spec = self.database.db_engine_spec
@@ -654,7 +652,6 @@ class SqlaTable(Model, BaseDatasource):
                     'orderby': orderby,
                     'extras': extras,
                     'columns': columns,
-                    'form_data': form_data,
                     'order_desc': True,
                 }
                 result = self.query(subquery_obj)
diff --git a/superset/viz.py b/superset/viz.py
index 6fa7d98..e59835b 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -205,7 +205,6 @@ class BaseViz(object):
             'timeseries_limit': limit,
             'extras': extras,
             'timeseries_limit_metric': timeseries_limit_metric,
-            'form_data': form_data,
             'order_desc': order_desc,
             'prequeries': [],
             'is_prequery': False,
@@ -229,16 +228,22 @@ class BaseViz(object):
 
     def cache_key(self, query_obj):
         """
-        The cache key is the datasource/query string tuple associated with the
-        object which needs to be fully deterministic.
+        The cache key is made out of the key/values in `query_obj`
+
+        We remove datetime bounds that are hard values,
+        and replace them with the use-provided inputs to bounds, which
+        may we time-relative (as in "5 days ago" or "now").
         """
+        cache_dict = copy.deepcopy(query_obj)
+
+        for k in ['from_dttm', 'to_dttm']:
+            del cache_dict[k]
+
+        for k in ['since', 'until', 'datasource']:
+            cache_dict[k] = self.form_data.get(k)
 
-        return hashlib.md5(
-            json.dumps((
-                self.datasource.id,
-                self.datasource.get_query_str(query_obj),
-            )).encode('utf-8'),
-        ).hexdigest()
+        json_data = self.json_dumps(cache_dict, sort_keys=True)
+        return hashlib.md5(json_data.encode('utf-8')).hexdigest()
 
     def get_payload(self, force=False):
         """Handles caching around the json payload retrieval"""
@@ -320,8 +325,13 @@ class BaseViz(object):
             'rowcount': rowcount,
         }
 
-    def json_dumps(self, obj):
-        return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
+    def json_dumps(self, obj, sort_keys=False):
+        return json.dumps(
+            obj,
+            default=utils.json_int_dttm_ser,
+            ignore_nan=True,
+            sort_keys=sort_keys,
+        )
 
     @property
     def data(self):
@@ -431,9 +441,10 @@ class TableViz(BaseViz):
             columns=list(df.columns),
         )
 
-    def json_dumps(self, obj):
+    def json_dumps(self, obj, sort_keys=False):
         if self.form_data.get('all_columns'):
-            return json.dumps(obj, default=utils.json_iso_dttm_ser)
+            return json.dumps(
+                obj, default=utils.json_iso_dttm_ser, sort_keys=sort_keys)
         else:
             return super(TableViz, self).json_dumps(obj)
 
diff --git a/tests/core_tests.py b/tests/core_tests.py
index a7edc4e..367ab68 100644
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -83,6 +83,18 @@ class CoreTests(SupersetTestCase):
             '/superset/slice/{}/?standalone=true'.format(slc.id))
         assert 'List Roles' not in resp
 
+    def test_cache_key(self):
+        self.login(username='admin')
+        slc = self.get_slice('Girls', db.session)
+
+        viz = slc.viz
+        qobj = viz.query_obj()
+        cache_key = viz.cache_key(qobj)
+        self.assertEqual(cache_key, viz.cache_key(qobj))
+
+        qobj['groupby'] = []
+        self.assertNotEqual(cache_key, viz.cache_key(qobj))
+
     def test_slice_json_endpoint(self):
         self.login(username='admin')
         slc = self.get_slice('Girls', db.session)

-- 
To stop receiving notification emails like this one, please contact
maximebeauchemin@apache.org.