You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ar...@apache.org on 2024/03/25 17:17:22 UTC

(superset) 07/10: Table with Time Comparison:

This is an automated email from the ASF dual-hosted git repository.

arivero pushed a commit to branch table-time-comparison
in repository https://gitbox.apache.org/repos/asf/superset.git

commit f1ba26da2b450d61ac9a73450e6c4289497e7120
Author: Antonio Rivero <an...@gmail.com>
AuthorDate: Mon Mar 4 23:07:00 2024 +0100

    Table with Time Comparison:
    
    - Add comments to help navigate the new JOIN query operation ofr time_comparison
---
 superset/connectors/sqla/models.py | 49 ++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 6259628d5b..a872f75553 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -1438,12 +1438,25 @@ class SqlaTable(
         mutate: bool,
         instant_time_comparison_info: dict[str, Any],
     ) -> tuple[str, list[str]]:
+        """
+        Main goal of this method is to create a JOIN between a given query object and
+        other that shifts the time filters. This is different from time_offsets because
+        we are not joining result sets but rather we're applying the JOIN at query level.
+        Use case: Compare paginated data in a Table Chart. But ideally can be leveraged by
+        anything that needs the experimental instant time comparison.
+        """
+        # So we don't override the original QueryObject
         query_obj_clone = copy.copy(query_obj)
         final_query_sql = ""
+        # The inner query object doesn't need limits or offset
         query_obj_clone["row_limit"] = None
         query_obj_clone["row_offset"] = None
+        # Let's get what range should we be using when building the time_comparison shift
+        # This is computing the time_shift based on some predefined options of deltas
         instant_time_comparison_range = instant_time_comparison_info.get("range")
         if instant_time_comparison_range == InstantTimeComparison.CUSTOM:
+            # If it's a custom filter, we take the 1st temporal filter and change it with
+            # whatever value we received in the request as the custom filter.
             custom_filter = instant_time_comparison_info.get("filter", {})
             temporal_filters = [
                 filter["col"]
@@ -1462,23 +1475,28 @@ class SqlaTable(
             new_filters = temporal_filters + non_temporal_filters
             query_obj_clone["filter"] = new_filters
         if instant_time_comparison_range != InstantTimeComparison.CUSTOM:
+            # When not custom, we're supposed to use the predefined time ranges
+            # Year, Month, Week or Inherited
             query_obj_clone["extras"] = {
                 **query_obj_clone.get("extras", {}),
                 "instant_time_comparison_range": instant_time_comparison_range,
             }
-        sqlaq_2 = self.get_sqla_query(**query_obj_clone)
+        shifted_sqlaq = self.get_sqla_query(**query_obj_clone)
+        # We JOIN only over columns, not metrics or anything else since those cannot be
+        # joined
         join_columns = query_obj_clone.get("columns") or []
-        sqla_query_a = sqlaq.sqla_query
-        sqla_query_b = sqlaq_2.sqla_query
-        sqla_query_b_subquery = sqla_query_b.subquery()
-        query_a_cte = sqla_query_a.cte("query_a_results")
-        column_names_a = [column.key for column in sqla_query_a.c]
+        original_query_a = sqlaq.sqla_query
+        shifted_query_b = shifted_sqlaq.sqla_query
+        shifted_query_b_subquery = shifted_query_b.subquery()
+        query_a_cte = original_query_a.cte("query_a_results")
+        column_names_a = [column.key for column in original_query_a.c]
         exclude_columns_b = set(query_obj_clone.get("columns") or [])
+        # Let's prepare the columns set to be used in query A and B
         selected_columns_a = [query_a_cte.c[col].label(col) for col in column_names_a]
         # Renamed columns from Query B (with "prev_" prefix)
         selected_columns_b = [
-            sqla_query_b_subquery.c[col].label(f"prev_{col}")
-            for col in sqla_query_b_subquery.c.keys()
+            shifted_query_b_subquery.c[col].label(f"prev_{col}")
+            for col in shifted_query_b_subquery.c.keys()
             if col not in exclude_columns_b
         ]
         # Combine selected columns from both queries
@@ -1486,25 +1504,30 @@ class SqlaTable(
         if join_columns and not query_obj_clone.get("is_rowcount"):
             # Proceed with JOIN operation as before since join_columns is not empty
             join_conditions = [
-                sqla_query_b_subquery.c[col] == query_a_cte.c[col]
+                shifted_query_b_subquery.c[col] == query_a_cte.c[col]
                 for col in join_columns
-                if col in sqla_query_b_subquery.c and col in query_a_cte.c
+                if col in shifted_query_b_subquery.c and col in query_a_cte.c
             ]
             final_query = sa.select(*final_selected_columns).select_from(
-                sqla_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions))
+                shifted_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions))
             )
         else:
+            # When dealing with queries that have no columns or that are totals,
+            # rowcounts etc we join with the 1 = 1 to create a result set that have
+            # both sets (original and prev)
             final_query = sa.select(*final_selected_columns).select_from(
-                sqla_query_b_subquery.join(
+                shifted_query_b_subquery.join(
                     query_a_cte, sa.literal(True) == sa.literal(True)
                 )
             )
+        # Transform the query as you would within get_query_str_extended
         final_query_sql = self.database.compile_sqla_query(final_query)
         final_query_sql = self._apply_cte(final_query_sql, sqlaq.cte)
         final_query_sql = sqlparse.format(final_query_sql, reindent=True)
         if mutate:
             final_query_sql = self.mutate_query_from_config(final_query_sql)
 
+        # Prepare the labels for the columns to be used
         labels_expected = self.extract_column_names(final_selected_columns)
 
         return final_query_sql, labels_expected
@@ -1520,6 +1543,8 @@ class SqlaTable(
         sql = self._apply_cte(sql, sqlaq.cte)
         sql = sqlparse.format(sql, reindent=True)
 
+        # Need to tell apart the regular queries from the ones that need
+        # Time comparison
         query_obj_clone = copy.copy(query_obj)
         query_object_extras: dict[str, Any] = query_obj.get("extras", {})
         instant_time_comparison_info = query_object_extras.get(