You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2021/01/19 08:17:20 UTC

[GitHub] [superset] villebro commented on a change in pull request #12482: refactor(connectors): refactor sqla query generation

villebro commented on a change in pull request #12482:
URL: https://github.com/apache/superset/pull/12482#discussion_r559981216



##########
File path: superset/connectors/sqla/models.py
##########
@@ -1147,109 +1125,361 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
 
             if isinstance(col, Label):
                 label = col._label  # pylint: disable=protected-access
-                if label in metrics_exprs_by_label:
-                    col = metrics_exprs_by_label[label]
+                if label in metric_expressions_by_label:
+                    col = metric_expressions_by_label[label]
+
+            order_by.append(direction(col))
+        return order_by
+
+    def _get_inner_expressions(
+        self, groupby_expressions: Dict[str, Label], inner_main_metric_expr: Label
+    ) -> Tuple[List[Label], List[Label]]:
+        """
+        generate select and groupby expressions for inner query
+        """
+        inner_select_expressions = []
+        inner_groupby_expressions = []
+        for gby_name, gby_obj in groupby_expressions.items():
+            inner = self.make_sqla_column_compatible(gby_obj, gby_name + "__")
+            inner_groupby_expressions.append(inner)
+            inner_select_expressions.append(inner)
+
+        inner_select_expressions += [inner_main_metric_expr]
+        return inner_select_expressions, inner_groupby_expressions
+
+    def _get_subquery(  # pylint:disable=too-many-arguments
+        self,
+        inner_select_expressions: List[Label],
+        query_table: TableClause,
+        inner_main_metric_expr: Label,
+        where_clause: List[BooleanClauseList],
+        inner_time_filter: BooleanClauseList,
+        inner_groupby_expressions: List[Label],
+        timeseries_limit_metric: Optional[Metric],
+        timeseries_limit: int,
+        metrics_by_name: Dict[str, SqlMetric],
+        columns_by_name: Dict[str, TableColumn],
+        order_desc: bool,
+    ) -> Select:
+        """
+        generate complete subquery
+        """
+        subquery = select(inner_select_expressions).select_from(query_table)
+        subquery = subquery.where(and_(*(where_clause + [inner_time_filter])))
+        subquery = subquery.group_by(*inner_groupby_expressions)
+
+        order_direction_column = inner_main_metric_expr
+        if timeseries_limit_metric:
+            order_direction_column = self._get_timeseries_orderby(
+                timeseries_limit_metric, metrics_by_name, columns_by_name
+            )
+        direction = desc if order_desc else asc
+        subquery = subquery.order_by(direction(order_direction_column))
+        subquery = subquery.limit(timeseries_limit)
+        return subquery
+
+    def _get_inner_query(  # pylint:disable=too-many-arguments,too-many-locals
+        self,
+        timeseries_limit: int,
+        is_sip_38: bool,
+        main_metric_expression: Label,
+        groupby_expressions: "OrderedDict[str, Label]",
+        query_table: TableClause,
+        time_range_endpoints: Optional[Any],
+        from_dttm: Optional[datetime],
+        metrics_by_name: Dict[str, SqlMetric],
+        columns_by_name: Dict[str, ColumnElement],
+        order_desc: bool,
+        to_dttm: Optional[datetime],
+        where_clause: List[BooleanClauseList],
+        timeseries_limit_metric: Optional[Metric],
+        db_engine_spec: SqliteEngineSpec,
+        metrics: List[Metric],
+        granularity: str,
+        filter_: Optional[List[Dict[str, Any]]] = None,
+        extras: Optional[Dict[str, Any]] = None,
+        columns: Optional[List[str]] = None,
+        groupby: Optional[List[str]] = None,
+        orderby: Optional[List[Tuple[ColumnElement, bool]]] = None,
+        inner_from_dttm: Optional[datetime] = None,
+        inner_to_dttm: Optional[datetime] = None,
+    ) -> Tuple[TableClause, List[str]]:

Review comment:
       Some of these are available on `self`, e.g. `db_engine_spec` can be replaced with `self.database.db_engine_spec` hence doesn't need to be here.

##########
File path: superset/connectors/sqla/utils.py
##########
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from collections import OrderedDict  # pylint:disable = unused-import
+from typing import Any, Dict, List
+
+import pandas as pd
+import sqlalchemy as sa
+from flask_babel import lazy_gettext as _
+from jinja2.exceptions import TemplateError
+from sqlalchemy import and_, or_
+from sqlalchemy.sql import ColumnElement
+from sqlalchemy.sql.expression import BooleanClauseList, Label
+
+from superset.constants import NULL_STRING
+from superset.exceptions import QueryObjectValidationError
+from superset.jinja_context import BaseTemplateProcessor
+from superset.utils.core import cast_to_num, FilterOperator
+
+
+def get_having_clause(
+    extra_having: Any, template_processor: BaseTemplateProcessor
+) -> List[BooleanClauseList]:
+    """
+    generate complete having clause from extra arg 'have'
+    """
+    having_clause = []
+    if extra_having:
+        try:
+            having = template_processor.process_template(extra_having)
+        except TemplateError as ex:
+            raise QueryObjectValidationError(
+                _(
+                    "Error in jinja expression in HAVING clause: %(msg)s",
+                    msg=ex.message,
+                )
+            )
+        having_clause += [sa.text("({})".format(having))]
+    return having_clause
+
+
+def get_expected_labels_from_select(select_expressions: List[Label]) -> List[str]:
+    return [
+        c._df_label_expected  # pylint: disable=protected-access
+        for c in select_expressions
+    ]
+
+
+def get_where_operation(  # pylint:disable=too-many-branches
+    flt: Dict[str, Any], operation: str, col_obj: Any, value: Any

Review comment:
       I believe we can add some types here, too e.g. `col_obj` is probably `Column`.

##########
File path: superset/connectors/sqla/models.py
##########
@@ -871,102 +908,76 @@ def _get_sqla_row_level_filters(
                 _("Error in jinja expression in RLS filters: %(msg)s", msg=ex.message,)
             )
 
-    def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements
-        self,
-        metrics: List[Metric],
-        granularity: str,
-        from_dttm: Optional[datetime],
-        to_dttm: Optional[datetime],
-        columns: Optional[List[str]] = None,
-        groupby: Optional[List[str]] = None,
-        filter: Optional[  # pylint: disable=redefined-builtin
-            List[Dict[str, Any]]
-        ] = None,
-        is_timeseries: bool = True,
-        timeseries_limit: int = 15,
-        timeseries_limit_metric: Optional[Metric] = None,
-        row_limit: Optional[int] = None,
-        row_offset: Optional[int] = None,
-        inner_from_dttm: Optional[datetime] = None,
-        inner_to_dttm: Optional[datetime] = None,
-        orderby: Optional[List[Tuple[ColumnElement, bool]]] = None,
-        extras: Optional[Dict[str, Any]] = None,
-        order_desc: bool = True,
-    ) -> SqlaQuery:
-        """Querying any sqla table from this common interface"""
-        template_kwargs = {
-            "from_dttm": from_dttm.isoformat() if from_dttm else None,
-            "groupby": groupby,
-            "metrics": metrics,
-            "row_limit": row_limit,
-            "row_offset": row_offset,
-            "to_dttm": to_dttm.isoformat() if to_dttm else None,
-            "filter": filter,
-            "columns": [col.column_name for col in self.columns],
-        }
-        is_sip_38 = is_feature_enabled("SIP_38_VIZ_REARCHITECTURE")
+    def _update_template_kwargs(self, template_kwargs: Dict[str, Any]) -> None:
         template_kwargs.update(self.template_params_dict)
-        extra_cache_keys: List[Any] = []
-        template_kwargs["extra_cache_keys"] = extra_cache_keys
-        template_processor = self.get_template_processor(**template_kwargs)
-        db_engine_spec = self.database.db_engine_spec
-        prequeries: List[str] = []
 
-        orderby = orderby or []
+    def _get_columns_by_name(self) -> Dict[str, TableColumn]:
+        return {col.column_name: col for col in self.columns}
 
-        # For backward compatibility
-        if granularity not in self.dttm_cols:
-            granularity = self.main_dttm_col
+    def _get_metrics_by_name(self) -> Dict[str, SqlMetric]:
+        return {m.metric_name: m for m in self.metrics}
 
-        # Database spec supports join-free timeslot grouping
-        time_groupby_inline = db_engine_spec.time_groupby_inline
-
-        columns_by_name: Dict[str, TableColumn] = {
-            col.column_name: col for col in self.columns
-        }
-        metrics_by_name: Dict[str, SqlMetric] = {m.metric_name: m for m in self.metrics}
-
-        if not granularity and is_timeseries:
-            raise QueryObjectValidationError(
-                _(
-                    "Datetime column not provided as part table configuration "
-                    "and is required by this type of chart"
-                )
-            )
-        if (
-            not metrics
-            and not columns
-            and (is_sip_38 or (not is_sip_38 and not groupby))
-        ):
-            raise QueryObjectValidationError(_("Empty query?"))
-        metrics_exprs: List[ColumnElement] = []
+    def _get_metric_expressions(
+        self,
+        metrics: List[Metric],
+        columns_by_name: Dict[str, TableColumn],
+        metrics_by_name: Dict[str, SqlMetric],
+    ) -> List[Label]:

Review comment:
       At some point we should probably also look at resolving some of those circular imports (let's leave those for another PR)

##########
File path: superset/connectors/sqla/models.py
##########
@@ -377,7 +389,9 @@ class SqlMetric(Model, BaseMetric):
 
     def get_sqla_col(self, label: Optional[str] = None) -> Column:
         label = label or self.metric_name
+        print("label ", label)
         sqla_col = literal_column(self.expression)
+        print("sqla col ", str(sqla_col))

Review comment:
       These print statements should probably be removed

##########
File path: superset/connectors/sqla/utils.py
##########
@@ -0,0 +1,117 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from collections import OrderedDict  # pylint:disable = unused-import
+from typing import Any, Dict, List
+
+import pandas as pd
+import sqlalchemy as sa
+from flask_babel import lazy_gettext as _
+from jinja2.exceptions import TemplateError
+from sqlalchemy import and_, or_
+from sqlalchemy.sql import ColumnElement
+from sqlalchemy.sql.expression import BooleanClauseList, Label
+
+from superset.constants import NULL_STRING
+from superset.exceptions import QueryObjectValidationError
+from superset.jinja_context import BaseTemplateProcessor
+from superset.utils.core import cast_to_num, FilterOperator
+
+
+def get_having_clause(
+    extra_having: Any, template_processor: BaseTemplateProcessor

Review comment:
       I believe having is `Optional[str]`
   ```suggestion
       extra_having: Optional[str], template_processor: BaseTemplateProcessor
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org