You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by "dpgaspar (via GitHub)" <gi...@apache.org> on 2023/01/31 09:33:59 UTC

[GitHub] [superset] dpgaspar commented on a diff in pull request #22910: chore: Migrate /superset/estimate_query_cost/// to API v1

dpgaspar commented on code in PR #22910:
URL: https://github.com/apache/superset/pull/22910#discussion_r1091648584


##########
superset/sqllab/commands/estimate.py:
##########
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=too-few-public-methods, too-many-arguments
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List
+
+from flask_babel import gettext as __, lazy_gettext as _
+
+from superset import app, db
+from superset.commands.base import BaseCommand
+from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.exceptions import SupersetErrorException, SupersetTimeoutException
+from superset.jinja_context import get_template_processor
+from superset.models.core import Database
+from superset.sqllab.schemas import EstimateQueryCostSchema
+from superset.utils import core as utils
+
+config = app.config
+SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"]
+stats_logger = config["STATS_LOGGER"]
+
+logger = logging.getLogger(__name__)
+
+
+class QueryEstimationCommand(BaseCommand):
+    _database_id: int
+    _sql: str
+    _template_params: Dict[str, Any]
+    _schema: str
+    _database: Database
+
+    def __init__(self, params: EstimateQueryCostSchema) -> None:
+        self._database_id = params.get("database_id")
+        self._sql = params.get("sql", "")
+        self._template_params = params.get("template_params", {})
+        self._schema = params.get("schema", "")
+
+    def validate(self) -> None:
+        self._database = db.session.query(Database).get(self._database_id)

Review Comment:
   this should go to the DAO to get all possible security constraints applied to it



##########
superset/sqllab/commands/estimate.py:
##########
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=too-few-public-methods, too-many-arguments
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, List
+
+from flask_babel import gettext as __, lazy_gettext as _
+
+from superset import app, db
+from superset.commands.base import BaseCommand
+from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.exceptions import SupersetErrorException, SupersetTimeoutException
+from superset.jinja_context import get_template_processor
+from superset.models.core import Database
+from superset.sqllab.schemas import EstimateQueryCostSchema
+from superset.utils import core as utils
+
+config = app.config
+SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"]
+stats_logger = config["STATS_LOGGER"]
+
+logger = logging.getLogger(__name__)
+
+
+class QueryEstimationCommand(BaseCommand):
+    _database_id: int
+    _sql: str
+    _template_params: Dict[str, Any]
+    _schema: str
+    _database: Database
+
+    def __init__(self, params: EstimateQueryCostSchema) -> None:

Review Comment:
   user should be passed in, because there are some security constraints that should be applied



##########
superset/sqllab/api.py:
##########
@@ -68,17 +70,71 @@ class SqlLabRestApi(BaseSupersetApi):
 
     class_permission_name = "Query"
 
+    estimate_model_schema = EstimateQueryCostSchema()
     execute_model_schema = ExecutePayloadSchema()
 
     apispec_parameter_schemas = {
         "sql_lab_get_results_schema": sql_lab_get_results_schema,
     }
     openapi_spec_tag = "SQL Lab"
     openapi_spec_component_schemas = (
+        EstimateQueryCostSchema,
         ExecutePayloadSchema,
         QueryExecutionResponseSchema,
     )
 
+    @expose("/estimate/", methods=["POST"])

Review Comment:
   What do you think about placing this one on `/api/v1/database/` instead?



##########
tests/integration_tests/sql_lab/commands_tests.py:
##########
@@ -18,18 +18,88 @@
 from unittest.mock import patch
 
 import pytest
+from flask_babel import gettext as __
 
-from superset import db, sql_lab
+from superset import app, db, sql_lab
 from superset.common.db_query_status import QueryStatus
-from superset.errors import SupersetErrorType
-from superset.exceptions import SerializationError, SupersetErrorException
+from superset.errors import ErrorLevel, SupersetErrorType
+from superset.exceptions import (
+    SerializationError,
+    SupersetErrorException,
+    SupersetTimeoutException,
+)
 from superset.models.core import Database
 from superset.models.sql_lab import Query
-from superset.sqllab.commands import results
+from superset.sqllab.commands import estimate, results
 from superset.utils import core as utils
 from tests.integration_tests.base_tests import SupersetTestCase
 
 
+class TestQueryEstimationCommand(SupersetTestCase):
+    def test_validation_no_database(self) -> None:
+        params = {"database_id": 1, "sql": "SELECT 1"}
+        command = estimate.QueryEstimationCommand(params)
+
+        with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db:
+            mock_superset_db.session.query().get.return_value = None
+            with pytest.raises(SupersetErrorException) as ex_info:
+                command.validate()
+            assert (
+                ex_info.value.error.error_type
+                == SupersetErrorType.RESULTS_BACKEND_ERROR
+            )
+
+    @patch("superset.tasks.scheduler.is_feature_enabled")
+    def test_run_timeout(self, is_feature_enabled) -> None:
+        params = {"database_id": 1, "sql": "SELECT 1", "template_params": {"temp": 123}}
+        command = estimate.QueryEstimationCommand(params)
+
+        db_mock = mock.Mock()
+        db_mock.db_engine_spec = mock.Mock()
+        db_mock.db_engine_spec.estimate_query_cost = mock.Mock(
+            side_effect=SupersetTimeoutException(
+                error_type=SupersetErrorType.CONNECTION_DATABASE_TIMEOUT,
+                message=(
+                    "Please check your connection details and database settings, "
+                    "and ensure that your database is accepting connections, "
+                    "then try connecting again."
+                ),
+                level=ErrorLevel.ERROR,
+            )
+        )
+        db_mock.db_engine_spec.query_cost_formatter = mock.Mock(return_value=None)
+        is_feature_enabled.return_value = False
+
+        with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db:
+            mock_superset_db.session.query().get.return_value = db_mock
+            with pytest.raises(SupersetErrorException) as ex_info:
+                command.run()
+            assert (
+                ex_info.value.error.error_type == SupersetErrorType.SQLLAB_TIMEOUT_ERROR
+            )
+            assert ex_info.value.error.message == __(
+                "The query estimation was killed after %(sqllab_timeout)s seconds. It might "
+                "be too complex, or the database might be under heavy load.",
+                sqllab_timeout=app.config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"],
+            )
+
+    def test_run_success(self) -> None:

Review Comment:
   add some tests for users that don't have access to the database (missing database access on [some-db.id])



##########
superset/sqllab/schemas.py:
##########
@@ -25,6 +25,13 @@
 }
 
 
+class EstimateQueryCostSchema(Schema):
+    database_id = fields.Integer(required=True)
+    sql = fields.String(required=True)

Review Comment:
   let's add some nice descriptions here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org