You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by "betodealmeida (via GitHub)" <gi...@apache.org> on 2023/02/22 20:18:15 UTC

[GitHub] [superset] betodealmeida commented on a diff in pull request #22853: chore: Refactor ExploreMixin to power both Datasets (SqlaTable) and Query models

betodealmeida commented on code in PR #22853:
URL: https://github.com/apache/superset/pull/22853#discussion_r1114887671


##########
superset-frontend/src/SqlLab/components/SaveDatasetModal/index.tsx:
##########
@@ -61,7 +61,7 @@ export type ExploreQuery = QueryResponse & {
 };
 
 export interface ISimpleColumn {
-  name?: string | null;
+  column_name?: string | null;

Review Comment:
   Any reason why we're changing this? `column.column_name` is redundant.



##########
superset/models/sql_lab.py:
##########
@@ -183,47 +184,33 @@ def sql_tables(self) -> List[Table]:
         return list(ParsedQuery(self.sql).tables)
 
     @property
-    def columns(self) -> List[Dict[str, Any]]:
-        bool_types = ("BOOL",)
-        num_types = (
-            "DOUBLE",
-            "FLOAT",
-            "INT",
-            "BIGINT",
-            "NUMBER",
-            "LONG",
-            "REAL",
-            "NUMERIC",
-            "DECIMAL",
-            "MONEY",
+    def columns(self) -> List["TableColumn"]:
+        from superset.connectors.sqla.models import (  # pylint: disable=import-outside-toplevel
+            TableColumn,
         )
-        date_types = ("DATE", "TIME")
-        str_types = ("VARCHAR", "STRING", "CHAR")
+
         columns = []
-        col_type = ""
         for col in self.extra.get("columns", []):
-            computed_column = {**col}
-            col_type = col.get("type")
-
-            if col_type and any(map(lambda t: t in col_type.upper(), str_types)):
-                computed_column["type_generic"] = GenericDataType.STRING
-            if col_type and any(map(lambda t: t in col_type.upper(), bool_types)):
-                computed_column["type_generic"] = GenericDataType.BOOLEAN
-            if col_type and any(map(lambda t: t in col_type.upper(), num_types)):
-                computed_column["type_generic"] = GenericDataType.NUMERIC
-            if col_type and any(map(lambda t: t in col_type.upper(), date_types)):
-                computed_column["type_generic"] = GenericDataType.TEMPORAL
-
-            computed_column["column_name"] = col.get("name")
-            computed_column["groupby"] = True
-            columns.append(computed_column)
+            columns.append(
+                TableColumn(
+                    column_name=col["name"],
+                    type=col["type"],
+                    is_dttm=col["is_dttm"],
+                    groupby=True,
+                    filterable=True,
+                )
+            )
         return columns
 
+    @property
+    def db_extra(self) -> Optional[Dict[str, Any]]:
+        return None
+
     @property
     def data(self) -> Dict[str, Any]:
         order_by_choices = []
         for col in self.columns:
-            column_name = str(col.get("column_name") or "")
+            column_name = str(col.column_name or "")

Review Comment:
   We don't need `str()` here, do we?



##########
superset/models/helpers.py:
##########
@@ -682,7 +690,18 @@ class ExploreMixin:  # pylint: disable=too-many-public-methods
     }
 
     @property
-    def query(self) -> str:
+    def fetch_value_predicate(self) -> str:
+        return "fix this!"

Review Comment:
   ```suggestion
           raise NotImplementedError()
   ```



##########
tests/integration_tests/charts/data/api_tests.py:
##########
@@ -1203,8 +1203,8 @@ def test_chart_cache_timeout_chart_not_found(
     [
         (200, {"where": "1 = 1"}),
         (200, {"having": "count(*) > 0"}),
-        (400, {"where": "col1 in (select distinct col1 from physical_dataset)"}),
-        (400, {"having": "count(*) > (select count(*) from physical_dataset)"}),
+        (403, {"where": "col1 in (select distinct col1 from physical_dataset)"}),
+        (403, {"having": "count(*) > (select count(*) from physical_dataset)"}),

Review Comment:
   Why are we getting 403s now?



##########
superset/models/helpers.py:
##########
@@ -1465,14 +1552,20 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                     col = metrics_exprs_by_expr.get(str(col), col)
                     need_groupby = True
             elif col in columns_by_name:
-                gb_column_obj = columns_by_name[col]
-                if isinstance(gb_column_obj, dict):
-                    col = self.get_sqla_col(gb_column_obj)
-                else:
-                    col = gb_column_obj.get_sqla_col()
+                col = self.convert_tbl_column_to_sqla_col(
+                    columns_by_name[col], template_processor=template_processor
+                )
+                # col = columns_by_name[col].get_sqla_col(
+                #     template_processor=template_processor
+                # )

Review Comment:
   ```suggestion
   ```



##########
superset/connectors/sqla/models.py:
##########
@@ -304,6 +282,35 @@ def db_extra(self) -> Dict[str, Any]:
     def type_generic(self) -> Optional[utils.GenericDataType]:
         if self.is_dttm:
             return GenericDataType.TEMPORAL
+
+        bool_types = ("BOOL",)
+        num_types = (
+            "DOUBLE",
+            "FLOAT",
+            "INT",
+            "BIGINT",
+            "NUMBER",
+            "LONG",
+            "REAL",
+            "NUMERIC",
+            "DECIMAL",
+            "MONEY",
+        )
+        date_types = ("DATE", "TIME")
+        str_types = ("VARCHAR", "STRING", "CHAR")
+
+        if self.table is None:
+            # Query.TableColumns don't have a reference to a table.db_engine_spec
+            # reference so this logic will manage rendering types
+            if self.type and any(map(lambda t: t in self.type.upper(), str_types)):

Review Comment:
   Nit: I think you moved this old code around, but today it would be better written without `map` nor `lambda`:
   
   ```python
   if self.type and any(t in self.type.upper() for t in str_types):
   ```
   
   Same below.



##########
superset/models/helpers.py:
##########
@@ -1179,26 +1241,27 @@ def _get_series_orderby(
 
     def adhoc_column_to_sqla(
         self,
-        col: Type["AdhocColumn"],  # type: ignore
+        col: "AdhocColumn",  # type: ignore
         template_processor: Optional[BaseTemplateProcessor] = None,
     ) -> ColumnElement:
-        """
-        Turn an adhoc column into a sqlalchemy column.
-
-        :param col: Adhoc column definition
-        :param template_processor: template_processor instance
-        :returns: The metric defined as a sqlalchemy column
-        :rtype: sqlalchemy.sql.column
-        """
-        label = utils.get_column_name(col)  # type: ignore
-        expression = self._process_sql_expression(
-            expression=col["sqlExpression"],
-            database_id=self.database_id,
-            schema=self.schema,
-            template_processor=template_processor,
-        )
-        sqla_column = literal_column(expression)
-        return self.make_sqla_column_compatible(sqla_column, label)
+        raise NotImplementedError()
+        # """
+        # Turn an adhoc column into a sqlalchemy column.
+
+        # :param col: Adhoc column definition
+        # :param template_processor: template_processor instance
+        # :returns: The metric defined as a sqlalchemy column
+        # :rtype: sqlalchemy.sql.column
+        # """
+        # label = utils.get_column_name(col)  # type: ignore
+        # expression = self._process_sql_expression(
+        #     expression=col["sqlExpression"],
+        #     database_id=self.database_id,
+        #     schema=self.schema,
+        #     template_processor=template_processor,
+        # )
+        # sqla_column = literal_column(expression)
+        # return self.make_sqla_column_compatible(sqla_column, label)

Review Comment:
   If we're not using code it's better to remove it:
   
   ```suggestion
   ```



##########
superset/models/helpers.py:
##########
@@ -1498,33 +1591,27 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                     # if groupby field/expr equals granularity field/expr
                     if selected == granularity:
                         table_col = columns_by_name[selected]
-                        if isinstance(table_col, dict):
-                            outer = self.get_timestamp_expression(
-                                column=table_col,
-                                time_grain=time_grain,
-                                label=selected,
-                                template_processor=template_processor,
-                            )
-                        else:
-                            outer = table_col.get_timestamp_expression(
-                                time_grain=time_grain,
-                                label=selected,
-                                template_processor=template_processor,
-                            )
+                        outer = table_col.get_timestamp_expression(
+                            time_grain=time_grain,
+                            label=selected,
+                            template_processor=template_processor,
+                        )
                     # if groupby field equals a selected column
                     elif selected in columns_by_name:
-                        if isinstance(columns_by_name[selected], dict):
-                            outer = sa.column(f"{selected}")
-                            outer = self.make_sqla_column_compatible(outer, selected)
-                        else:
-                            outer = columns_by_name[selected].get_sqla_col()
+                        outer = self.convert_tbl_column_to_sqla_col(
+                            columns_by_name[selected],
+                            template_processor=template_processor,
+                        )
+                    # outer = columns_by_name[selected].get_sqla_col(
+                    #     template_processor=template_processor
+                    # )

Review Comment:
   ```suggestion
   ```



##########
superset/models/helpers.py:
##########
@@ -1643,67 +1725,57 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
             sqla_col: Optional[Column] = None
             if flt_col == utils.DTTM_ALIAS and is_timeseries and dttm_col:
                 col_obj = dttm_col
-            elif utils.is_adhoc_column(flt_col):
-                sqla_col = self.adhoc_column_to_sqla(flt_col)  # type: ignore
+            elif is_adhoc_column(flt_col):
+                sqla_col = self.adhoc_column_to_sqla(
+                    col=flt_col, template_processor=template_processor
+                )
             else:
                 col_obj = columns_by_name.get(flt_col)
             filter_grain = flt.get("grain")
 
             if is_feature_enabled("ENABLE_TEMPLATE_REMOVE_FILTERS"):
-                if utils.get_column_name(flt_col) in removed_filters:
+                if get_column_name(flt_col) in removed_filters:
                     # Skip generating SQLA filter when the jinja template handles it.
                     continue
 
             if col_obj or sqla_col is not None:
                 if sqla_col is not None:
                     pass
                 elif col_obj and filter_grain:
-                    if isinstance(col_obj, dict):
-                        sqla_col = self.get_timestamp_expression(
-                            col_obj, time_grain, template_processor=template_processor
-                        )
-                    else:
-                        sqla_col = col_obj.get_timestamp_expression(
-                            time_grain=filter_grain,
-                            template_processor=template_processor,
-                        )
-                elif col_obj and isinstance(col_obj, dict):
-                    sqla_col = sa.column(col_obj.get("column_name"))
+                    sqla_col = col_obj.get_timestamp_expression(
+                        time_grain=filter_grain, template_processor=template_processor
+                    )
                 elif col_obj:
-                    sqla_col = col_obj.get_sqla_col()
-
-                if col_obj and isinstance(col_obj, dict):
-                    col_type = col_obj.get("type")
-                else:
-                    col_type = col_obj.type if col_obj else None
+                    sqla_col = self.convert_tbl_column_to_sqla_col(
+                        tbl_column=col_obj, template_processor=template_processor
+                    )
+                    # sqla_col = col_obj.get_sqla_col(
+                    #     template_processor=template_processor
+                    # )
+                col_type = col_obj.type if col_obj else None
                 col_spec = db_engine_spec.get_column_spec(
                     native_type=col_type,
-                    db_extra=self.database.get_extra(),  # type: ignore
+                    #                    db_extra=self.database.get_extra(),

Review Comment:
   ```suggestion
   ```



##########
superset/models/helpers.py:
##########
@@ -1797,41 +1876,63 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                                 time_col=col_obj,
                                 start_dttm=_since,
                                 end_dttm=_until,
+                                label=sqla_col.key,
+                                template_processor=template_processor,
                             )
+                            # col_obj.get_time_filter(
+                            #     start_dttm=_since,
+                            #     end_dttm=_until,
+                            #     label=sqla_col.key,
+                            #     template_processor=template_processor,
+                            # )

Review Comment:
   ```suggestion
   ```



##########
superset/models/helpers.py:
##########
@@ -1538,19 +1625,28 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                 select_exprs.append(outer)
         elif columns:
             for selected in columns:
-                selected = self.validate_adhoc_subquery(
-                    selected,
+                if is_adhoc_column(selected):
+                    _sql = selected["sqlExpression"]
+                    _column_label = selected["label"]

Review Comment:
   By convention variables with a leading underscore are "private"... can we give these a better name? Like `column_sql` or something?



##########
superset/models/helpers.py:
##########
@@ -1881,26 +1982,30 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
                 inner_time_filter = []
 
                 if dttm_col and not db_engine_spec.time_groupby_inline:
-                    if isinstance(dttm_col, dict):
-                        inner_time_filter = [
-                            self.get_time_filter(
-                                dttm_col,
-                                inner_from_dttm or from_dttm,
-                                inner_to_dttm or to_dttm,
-                            )
-                        ]
-                    else:
-                        inner_time_filter = [
-                            dttm_col.get_time_filter(
-                                inner_from_dttm or from_dttm,
-                                inner_to_dttm or to_dttm,
-                            )
-                        ]
-
+                    inner_time_filter = [
+                        self.get_time_filter(
+                            time_col=dttm_col,
+                            start_dttm=inner_from_dttm or from_dttm,
+                            end_dttm=inner_to_dttm or to_dttm,
+                            template_processor=template_processor,
+                        )
+                        # dttm_col.get_time_filter(
+                        #     start_dttm=inner_from_dttm or from_dttm,
+                        #     end_dttm=inner_to_dttm or to_dttm,
+                        #     template_processor=template_processor,
+                        # )

Review Comment:
   ```suggestion
   ```



##########
superset/models/sql_lab.py:
##########
@@ -183,47 +184,33 @@ def sql_tables(self) -> List[Table]:
         return list(ParsedQuery(self.sql).tables)
 
     @property
-    def columns(self) -> List[Dict[str, Any]]:
-        bool_types = ("BOOL",)
-        num_types = (
-            "DOUBLE",
-            "FLOAT",
-            "INT",
-            "BIGINT",
-            "NUMBER",
-            "LONG",
-            "REAL",
-            "NUMERIC",
-            "DECIMAL",
-            "MONEY",
+    def columns(self) -> List["TableColumn"]:
+        from superset.connectors.sqla.models import (  # pylint: disable=import-outside-toplevel
+            TableColumn,
         )
-        date_types = ("DATE", "TIME")
-        str_types = ("VARCHAR", "STRING", "CHAR")
+
         columns = []
-        col_type = ""
         for col in self.extra.get("columns", []):
-            computed_column = {**col}
-            col_type = col.get("type")
-
-            if col_type and any(map(lambda t: t in col_type.upper(), str_types)):
-                computed_column["type_generic"] = GenericDataType.STRING
-            if col_type and any(map(lambda t: t in col_type.upper(), bool_types)):
-                computed_column["type_generic"] = GenericDataType.BOOLEAN
-            if col_type and any(map(lambda t: t in col_type.upper(), num_types)):
-                computed_column["type_generic"] = GenericDataType.NUMERIC
-            if col_type and any(map(lambda t: t in col_type.upper(), date_types)):
-                computed_column["type_generic"] = GenericDataType.TEMPORAL
-
-            computed_column["column_name"] = col.get("name")
-            computed_column["groupby"] = True
-            columns.append(computed_column)
+            columns.append(
+                TableColumn(
+                    column_name=col["name"],
+                    type=col["type"],
+                    is_dttm=col["is_dttm"],
+                    groupby=True,
+                    filterable=True,

Review Comment:
   Are we setting `filterable` to true in both datasets and queries, or just queries? For datasets we want to respect the configuration, so that if a user has marked a column as not filterable it shouldn't be filterable. (For queries I understand we need all columns to be filterable and groupable.)



##########
superset/models/helpers.py:
##########
@@ -1643,67 +1725,57 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
             sqla_col: Optional[Column] = None
             if flt_col == utils.DTTM_ALIAS and is_timeseries and dttm_col:
                 col_obj = dttm_col
-            elif utils.is_adhoc_column(flt_col):
-                sqla_col = self.adhoc_column_to_sqla(flt_col)  # type: ignore
+            elif is_adhoc_column(flt_col):
+                sqla_col = self.adhoc_column_to_sqla(
+                    col=flt_col, template_processor=template_processor
+                )
             else:
                 col_obj = columns_by_name.get(flt_col)
             filter_grain = flt.get("grain")
 
             if is_feature_enabled("ENABLE_TEMPLATE_REMOVE_FILTERS"):
-                if utils.get_column_name(flt_col) in removed_filters:
+                if get_column_name(flt_col) in removed_filters:
                     # Skip generating SQLA filter when the jinja template handles it.
                     continue
 
             if col_obj or sqla_col is not None:
                 if sqla_col is not None:
                     pass
                 elif col_obj and filter_grain:
-                    if isinstance(col_obj, dict):
-                        sqla_col = self.get_timestamp_expression(
-                            col_obj, time_grain, template_processor=template_processor
-                        )
-                    else:
-                        sqla_col = col_obj.get_timestamp_expression(
-                            time_grain=filter_grain,
-                            template_processor=template_processor,
-                        )
-                elif col_obj and isinstance(col_obj, dict):
-                    sqla_col = sa.column(col_obj.get("column_name"))
+                    sqla_col = col_obj.get_timestamp_expression(
+                        time_grain=filter_grain, template_processor=template_processor
+                    )
                 elif col_obj:
-                    sqla_col = col_obj.get_sqla_col()
-
-                if col_obj and isinstance(col_obj, dict):
-                    col_type = col_obj.get("type")
-                else:
-                    col_type = col_obj.type if col_obj else None
+                    sqla_col = self.convert_tbl_column_to_sqla_col(
+                        tbl_column=col_obj, template_processor=template_processor
+                    )
+                    # sqla_col = col_obj.get_sqla_col(
+                    #     template_processor=template_processor
+                    # )
+                col_type = col_obj.type if col_obj else None
                 col_spec = db_engine_spec.get_column_spec(
                     native_type=col_type,
-                    db_extra=self.database.get_extra(),  # type: ignore
+                    #                    db_extra=self.database.get_extra(),
                 )
                 is_list_target = op in (
                     utils.FilterOperator.IN.value,
                     utils.FilterOperator.NOT_IN.value,
                 )
 
-                if col_obj and isinstance(col_obj, dict):
-                    col_advanced_data_type = ""
-                else:
-                    col_advanced_data_type = (
-                        col_obj.advanced_data_type if col_obj else ""
-                    )
+                col_advanced_data_type = col_obj.advanced_data_type if col_obj else ""
 
                 if col_spec and not col_advanced_data_type:
                     target_generic_type = col_spec.generic_type
                 else:
-                    target_generic_type = utils.GenericDataType.STRING
+                    target_generic_type = GenericDataType.STRING
                 eq = self.filter_values_handler(
                     values=val,
                     operator=op,
                     target_generic_type=target_generic_type,
                     target_native_type=col_type,
                     is_list_target=is_list_target,
                     db_engine_spec=db_engine_spec,
-                    db_extra=self.database.get_extra(),  # type: ignore
+                    #                     db_extra=self.database.get_extra(),

Review Comment:
   ```suggestion
   ```



##########
superset-frontend/src/explore/components/controls/MetricControl/AdhocMetricOption.jsx:
##########
@@ -48,7 +48,7 @@ class AdhocMetricOption extends React.PureComponent {
   }
 
   onRemoveMetric(e) {
-    e.stopPropagation();
+    if (e !== undefined) e.stopPropagation();

Review Comment:
   `?.` works as a guard as well.



##########
superset/models/helpers.py:
##########
@@ -1643,67 +1725,57 @@ def get_sqla_query(  # pylint: disable=too-many-arguments,too-many-locals,too-ma
             sqla_col: Optional[Column] = None
             if flt_col == utils.DTTM_ALIAS and is_timeseries and dttm_col:
                 col_obj = dttm_col
-            elif utils.is_adhoc_column(flt_col):
-                sqla_col = self.adhoc_column_to_sqla(flt_col)  # type: ignore
+            elif is_adhoc_column(flt_col):
+                sqla_col = self.adhoc_column_to_sqla(
+                    col=flt_col, template_processor=template_processor
+                )
             else:
                 col_obj = columns_by_name.get(flt_col)
             filter_grain = flt.get("grain")
 
             if is_feature_enabled("ENABLE_TEMPLATE_REMOVE_FILTERS"):
-                if utils.get_column_name(flt_col) in removed_filters:
+                if get_column_name(flt_col) in removed_filters:
                     # Skip generating SQLA filter when the jinja template handles it.
                     continue
 
             if col_obj or sqla_col is not None:
                 if sqla_col is not None:
                     pass
                 elif col_obj and filter_grain:
-                    if isinstance(col_obj, dict):
-                        sqla_col = self.get_timestamp_expression(
-                            col_obj, time_grain, template_processor=template_processor
-                        )
-                    else:
-                        sqla_col = col_obj.get_timestamp_expression(
-                            time_grain=filter_grain,
-                            template_processor=template_processor,
-                        )
-                elif col_obj and isinstance(col_obj, dict):
-                    sqla_col = sa.column(col_obj.get("column_name"))
+                    sqla_col = col_obj.get_timestamp_expression(
+                        time_grain=filter_grain, template_processor=template_processor
+                    )
                 elif col_obj:
-                    sqla_col = col_obj.get_sqla_col()
-
-                if col_obj and isinstance(col_obj, dict):
-                    col_type = col_obj.get("type")
-                else:
-                    col_type = col_obj.type if col_obj else None
+                    sqla_col = self.convert_tbl_column_to_sqla_col(
+                        tbl_column=col_obj, template_processor=template_processor
+                    )
+                    # sqla_col = col_obj.get_sqla_col(
+                    #     template_processor=template_processor
+                    # )

Review Comment:
   ```suggestion
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org