You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by ar...@apache.org on 2024/03/08 00:44:17 UTC
(superset) 07/09: Table with Time Comparison:
This is an automated email from the ASF dual-hosted git repository.
arivero pushed a commit to branch table-time-comparison
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 0d4bad4979a73199b37074ff36521dc38b84bf05
Author: Antonio Rivero <an...@gmail.com>
AuthorDate: Mon Mar 4 23:07:00 2024 +0100
Table with Time Comparison:
- Add comments to help navigate the new JOIN query operation ofr time_comparison
---
superset/connectors/sqla/models.py | 49 ++++++++++++++++++++++++++++----------
1 file changed, 37 insertions(+), 12 deletions(-)
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 6259628d5b..a872f75553 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -1438,12 +1438,25 @@ class SqlaTable(
mutate: bool,
instant_time_comparison_info: dict[str, Any],
) -> tuple[str, list[str]]:
+ """
+ Main goal of this method is to create a JOIN between a given query object and
+ other that shifts the time filters. This is different from time_offsets because
+ we are not joining result sets but rather we're applying the JOIN at query level.
+ Use case: Compare paginated data in a Table Chart. But ideally can be leveraged by
+ anything that needs the experimental instant time comparison.
+ """
+ # So we don't override the original QueryObject
query_obj_clone = copy.copy(query_obj)
final_query_sql = ""
+ # The inner query object doesn't need limits or offset
query_obj_clone["row_limit"] = None
query_obj_clone["row_offset"] = None
+ # Let's get what range should we be using when building the time_comparison shift
+ # This is computing the time_shift based on some predefined options of deltas
instant_time_comparison_range = instant_time_comparison_info.get("range")
if instant_time_comparison_range == InstantTimeComparison.CUSTOM:
+ # If it's a custom filter, we take the 1st temporal filter and change it with
+ # whatever value we received in the request as the custom filter.
custom_filter = instant_time_comparison_info.get("filter", {})
temporal_filters = [
filter["col"]
@@ -1462,23 +1475,28 @@ class SqlaTable(
new_filters = temporal_filters + non_temporal_filters
query_obj_clone["filter"] = new_filters
if instant_time_comparison_range != InstantTimeComparison.CUSTOM:
+ # When not custom, we're supposed to use the predefined time ranges
+ # Year, Month, Week or Inherited
query_obj_clone["extras"] = {
**query_obj_clone.get("extras", {}),
"instant_time_comparison_range": instant_time_comparison_range,
}
- sqlaq_2 = self.get_sqla_query(**query_obj_clone)
+ shifted_sqlaq = self.get_sqla_query(**query_obj_clone)
+ # We JOIN only over columns, not metrics or anything else since those cannot be
+ # joined
join_columns = query_obj_clone.get("columns") or []
- sqla_query_a = sqlaq.sqla_query
- sqla_query_b = sqlaq_2.sqla_query
- sqla_query_b_subquery = sqla_query_b.subquery()
- query_a_cte = sqla_query_a.cte("query_a_results")
- column_names_a = [column.key for column in sqla_query_a.c]
+ original_query_a = sqlaq.sqla_query
+ shifted_query_b = shifted_sqlaq.sqla_query
+ shifted_query_b_subquery = shifted_query_b.subquery()
+ query_a_cte = original_query_a.cte("query_a_results")
+ column_names_a = [column.key for column in original_query_a.c]
exclude_columns_b = set(query_obj_clone.get("columns") or [])
+ # Let's prepare the columns set to be used in query A and B
selected_columns_a = [query_a_cte.c[col].label(col) for col in column_names_a]
# Renamed columns from Query B (with "prev_" prefix)
selected_columns_b = [
- sqla_query_b_subquery.c[col].label(f"prev_{col}")
- for col in sqla_query_b_subquery.c.keys()
+ shifted_query_b_subquery.c[col].label(f"prev_{col}")
+ for col in shifted_query_b_subquery.c.keys()
if col not in exclude_columns_b
]
# Combine selected columns from both queries
@@ -1486,25 +1504,30 @@ class SqlaTable(
if join_columns and not query_obj_clone.get("is_rowcount"):
# Proceed with JOIN operation as before since join_columns is not empty
join_conditions = [
- sqla_query_b_subquery.c[col] == query_a_cte.c[col]
+ shifted_query_b_subquery.c[col] == query_a_cte.c[col]
for col in join_columns
- if col in sqla_query_b_subquery.c and col in query_a_cte.c
+ if col in shifted_query_b_subquery.c and col in query_a_cte.c
]
final_query = sa.select(*final_selected_columns).select_from(
- sqla_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions))
+ shifted_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions))
)
else:
+ # When dealing with queries that have no columns or that are totals,
+ # rowcounts etc we join with the 1 = 1 to create a result set that have
+ # both sets (original and prev)
final_query = sa.select(*final_selected_columns).select_from(
- sqla_query_b_subquery.join(
+ shifted_query_b_subquery.join(
query_a_cte, sa.literal(True) == sa.literal(True)
)
)
+ # Transform the query as you would within get_query_str_extended
final_query_sql = self.database.compile_sqla_query(final_query)
final_query_sql = self._apply_cte(final_query_sql, sqlaq.cte)
final_query_sql = sqlparse.format(final_query_sql, reindent=True)
if mutate:
final_query_sql = self.mutate_query_from_config(final_query_sql)
+ # Prepare the labels for the columns to be used
labels_expected = self.extract_column_names(final_selected_columns)
return final_query_sql, labels_expected
@@ -1520,6 +1543,8 @@ class SqlaTable(
sql = self._apply_cte(sql, sqlaq.cte)
sql = sqlparse.format(sql, reindent=True)
+ # Need to tell apart the regular queries from the ones that need
+ # Time comparison
query_obj_clone = copy.copy(query_obj)
query_object_extras: dict[str, Any] = query_obj.get("extras", {})
instant_time_comparison_info = query_object_extras.get(