You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by am...@apache.org on 2021/05/04 11:39:24 UTC
[superset] 01/06: feat: Logic added to limiting factor column in
Query model (#13521)
This is an automated email from the ASF dual-hosted git repository.
amitmiran pushed a commit to branch 1.2
in repository https://gitbox.apache.org/repos/asf/superset.git
commit b5f988f5e9be7072a016c18fe746d08a22d88032
Author: AAfghahi <48...@users.noreply.github.com>
AuthorDate: Fri Apr 30 18:15:18 2021 -0400
feat: Logic added to limiting factor column in Query model (#13521)
* Sqllab limit
* Add migration script
* Set default values
* initial push
* revisions
* Update superset/views/core.py
Co-authored-by: Beto Dealmeida <ro...@dealmeida.net>
* moving migration to separate PR
* with migration
* revisions
* Fix apply_limit_to_sql
* all but tests
* added unit tests
* revisions
* Update superset/sql_lab.py
Co-authored-by: Beto Dealmeida <ro...@dealmeida.net>
* Update superset/sql_parse.py
Co-authored-by: Beto Dealmeida <ro...@dealmeida.net>
* fixed black issue
* Update superset/views/core.py
Co-authored-by: Beto Dealmeida <ro...@dealmeida.net>
* updated logic
Co-authored-by: Beto Dealmeida <ro...@dealmeida.net>
(cherry picked from commit e507508b4886c9b2d4d8fb931c1da45111e2cbbf)
---
superset-frontend/src/SqlLab/reducers/sqlLab.js | 1 +
superset-frontend/src/SqlLab/types.ts | 1 +
superset/db_engine_specs/base.py | 6 +++--
superset/models/core.py | 6 +++--
superset/models/sql_lab.py | 14 ++++++++++++
superset/sql_lab.py | 22 +++++++++++++-----
superset/sql_parse.py | 6 ++---
superset/views/core.py | 30 +++++++++++++++++++++----
tests/db_engine_specs/base_engine_spec_tests.py | 13 +++++++++++
tests/db_engine_specs/base_tests.py | 9 ++++++--
tests/sqllab_tests.py | 30 +++++++++++++++++++++++--
tests/superset_test_config.py | 2 +-
12 files changed, 119 insertions(+), 21 deletions(-)
diff --git a/superset-frontend/src/SqlLab/reducers/sqlLab.js b/superset-frontend/src/SqlLab/reducers/sqlLab.js
index 989604f..daa06a9 100644
--- a/superset-frontend/src/SqlLab/reducers/sqlLab.js
+++ b/superset-frontend/src/SqlLab/reducers/sqlLab.js
@@ -334,6 +334,7 @@ export default function sqlLabReducer(state = {}, action) {
results: action.results,
rows: action?.results?.data?.length,
state: 'success',
+ limitingFactor: action?.results?.query?.limitingFactor,
tempSchema: action?.results?.query?.tempSchema,
tempTable: action?.results?.query?.tempTable,
errorMessage: null,
diff --git a/superset-frontend/src/SqlLab/types.ts b/superset-frontend/src/SqlLab/types.ts
index 2770de3..61b677f 100644
--- a/superset-frontend/src/SqlLab/types.ts
+++ b/superset-frontend/src/SqlLab/types.ts
@@ -65,4 +65,5 @@ export type Query = {
templateParams: any;
rows: number;
queryLimit: number;
+ limitingFactor: string;
};
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 3f91d58..578c436 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -569,7 +569,9 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
return {}
@classmethod
- def apply_limit_to_sql(cls, sql: str, limit: int, database: "Database") -> str:
+ def apply_limit_to_sql(
+ cls, sql: str, limit: int, database: "Database", force: bool = False
+ ) -> str:
"""
Alters the SQL statement to apply a LIMIT clause
@@ -590,7 +592,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
if cls.limit_method == LimitMethod.FORCE_LIMIT:
parsed_query = sql_parse.ParsedQuery(sql)
- sql = parsed_query.set_or_update_query_limit(limit)
+ sql = parsed_query.set_or_update_query_limit(limit, force=force)
return sql
diff --git a/superset/models/core.py b/superset/models/core.py
index 5dadca2..11d3380 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -437,8 +437,10 @@ class Database(
cols=cols,
)
- def apply_limit_to_sql(self, sql: str, limit: int = 1000) -> str:
- return self.db_engine_spec.apply_limit_to_sql(sql, limit, self)
+ def apply_limit_to_sql(
+ self, sql: str, limit: int = 1000, force: bool = False
+ ) -> str:
+ return self.db_engine_spec.apply_limit_to_sql(sql, limit, self, force=force)
def safe_sqlalchemy_uri(self) -> str:
return self.sqlalchemy_uri
diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py
index 7511972..17c72db 100644
--- a/superset/models/sql_lab.py
+++ b/superset/models/sql_lab.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
"""A collection of ORM sqlalchemy models for SQL Lab"""
+import enum
import re
from datetime import datetime
from typing import Any, Dict, List
@@ -29,6 +30,7 @@ from sqlalchemy import (
Boolean,
Column,
DateTime,
+ Enum,
ForeignKey,
Integer,
Numeric,
@@ -49,6 +51,14 @@ from superset.sql_parse import CtasMethod, ParsedQuery, Table
from superset.utils.core import QueryStatus, user_label
+class LimitingFactor(str, enum.Enum):
+ QUERY = "QUERY"
+ DROPDOWN = "DROPDOWN"
+ QUERY_AND_DROPDOWN = "QUERY_AND_DROPDOWN"
+ NOT_LIMITED = "NOT_LIMITED"
+ UNKNOWN = "UNKNOWN"
+
+
class Query(Model, ExtraJSONMixin):
"""ORM model for SQL query
@@ -76,6 +86,9 @@ class Query(Model, ExtraJSONMixin):
executed_sql = Column(Text)
# Could be configured in the superset config.
limit = Column(Integer)
+ limiting_factor = Column(
+ Enum(LimitingFactor), server_default=LimitingFactor.UNKNOWN
+ )
select_as_cta = Column(Boolean)
select_as_cta_used = Column(Boolean, default=False)
ctas_method = Column(String(16), default=CtasMethod.TABLE)
@@ -120,6 +133,7 @@ class Query(Model, ExtraJSONMixin):
"id": self.client_id,
"queryId": self.id,
"limit": self.limit,
+ "limitingFactor": self.limiting_factor,
"progress": self.progress,
"rows": self.rows,
"schema": self.schema,
diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 234b1dd..9b62f9c 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -36,7 +36,7 @@ from superset.dataframe import df_to_records
from superset.db_engine_specs import BaseEngineSpec
from superset.extensions import celery_app
from superset.models.core import Database
-from superset.models.sql_lab import Query
+from superset.models.sql_lab import LimitingFactor, Query
from superset.result_set import SupersetResultSet
from superset.sql_parse import CtasMethod, ParsedQuery
from superset.utils.celery import session_scope
@@ -175,7 +175,7 @@ def get_sql_results( # pylint: disable=too-many-arguments
return handle_query_error(str(ex), query, session)
-# pylint: disable=too-many-arguments
+# pylint: disable=too-many-arguments, too-many-locals
def execute_sql_statement(
sql_statement: str,
query: Query,
@@ -190,6 +190,10 @@ def execute_sql_statement(
db_engine_spec = database.db_engine_spec
parsed_query = ParsedQuery(sql_statement)
sql = parsed_query.stripped()
+ # This is a test to see if the query is being
+ # limited by either the dropdown or the sql.
+ # We are testing to see if more rows exist than the limit.
+ increased_limit = None if query.limit is None else query.limit + 1
if not db_engine_spec.is_readonly_query(parsed_query) and not database.allow_dml:
raise SqlLabSecurityException(
@@ -215,11 +219,14 @@ def execute_sql_statement(
if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW):
query.limit = SQL_MAX_ROW
if query.limit:
- sql = database.apply_limit_to_sql(sql, query.limit)
+ # We are fetching one more than the requested limit in order
+ # to test whether there are more rows than the limit.
+ # Later, the extra row will be dropped before sending
+ # the results back to the user.
+ sql = database.apply_limit_to_sql(sql, increased_limit, force=True)
# Hook to allow environment-specific mutation (usually comments) to the SQL
sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)
-
try:
if log_query:
log_query(
@@ -245,7 +252,12 @@ def execute_sql_statement(
query.id,
str(query.to_dict()),
)
- data = db_engine_spec.fetch_data(cursor, query.limit)
+ data = db_engine_spec.fetch_data(cursor, increased_limit)
+ if query.limit is None or len(data) <= query.limit:
+ query.limiting_factor = LimitingFactor.NOT_LIMITED
+ else:
+ # return 1 row less than increased_query
+ data = data[:-1]
except Exception as ex:
logger.error("Query %d: %s", query.id, type(ex))
logger.debug("Query %d: %s", query.id, ex)
diff --git a/superset/sql_parse.py b/superset/sql_parse.py
index 1ad14d1..fdcb390 100644
--- a/superset/sql_parse.py
+++ b/superset/sql_parse.py
@@ -311,7 +311,7 @@ class ParsedQuery:
if any(not self._is_identifier(token2) for token2 in item.tokens):
self._extract_from_token(item)
- def set_or_update_query_limit(self, new_limit: int) -> str:
+ def set_or_update_query_limit(self, new_limit: int, force: bool = False) -> str:
"""Returns the query with the specified limit.
Does not change the underlying query if user did not apply the limit,
@@ -332,8 +332,8 @@ class ParsedQuery:
break
_, limit = statement.token_next(idx=limit_pos)
# Override the limit only when it exceeds the configured value.
- if limit.ttype == sqlparse.tokens.Literal.Number.Integer and new_limit < int(
- limit.value
+ if limit.ttype == sqlparse.tokens.Literal.Number.Integer and (
+ force or new_limit < int(limit.value)
):
limit.value = new_limit
elif limit.is_group:
diff --git a/superset/views/core.py b/superset/views/core.py
index cff9e48..7fb2f47 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -90,7 +90,7 @@ from superset.models.core import Database, FavStar, Log
from superset.models.dashboard import Dashboard
from superset.models.datasource_access_request import DatasourceAccessRequest
from superset.models.slice import Slice
-from superset.models.sql_lab import Query, TabState
+from superset.models.sql_lab import LimitingFactor, Query, TabState
from superset.models.user_attributes import UserAttribute
from superset.queries.dao import QueryDAO
from superset.security.analytics_db_safety import check_sqlalchemy_uri
@@ -2393,6 +2393,7 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
# Update saved query if needed
QueryDAO.update_saved_query_exec_info(query_id)
+ # TODO: set LimitingFactor to display?
payload = json.dumps(
apply_display_max_row_limit(data),
default=utils.pessimistic_json_iso_dttm_ser,
@@ -2548,6 +2549,12 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
if not (config.get("SQLLAB_CTAS_NO_LIMIT") and select_as_cta):
# set LIMIT after template processing
limits = [mydb.db_engine_spec.get_limit_from_sql(rendered_query), limit]
+ if limits[0] is None or limits[0] > limits[1]:
+ query.limiting_factor = LimitingFactor.DROPDOWN
+ elif limits[1] > limits[0]:
+ query.limiting_factor = LimitingFactor.QUERY
+ else: # limits[0] == limits[1]
+ query.limiting_factor = LimitingFactor.QUERY_AND_DROPDOWN
query.limit = min(lim for lim in limits if lim is not None)
# Flag for whether or not to expand data
@@ -2571,7 +2578,9 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
@has_access
@event_logger.log_this
@expose("/csv/<client_id>")
- def csv(self, client_id: str) -> FlaskResponse: # pylint: disable=no-self-use
+ def csv( # pylint: disable=no-self-use,too-many-locals
+ self, client_id: str
+ ) -> FlaskResponse:
"""Download the query results as csv."""
logger.info("Exporting CSV file [%s]", client_id)
query = db.session.query(Query).filter_by(client_id=client_id).one()
@@ -2599,8 +2608,21 @@ class Superset(BaseSupersetView): # pylint: disable=too-many-public-methods
logger.info("Using pandas to convert to CSV")
else:
logger.info("Running a query to turn into CSV")
- sql = query.select_sql or query.executed_sql
- df = query.database.get_df(sql, query.schema)
+ if query.select_sql:
+ sql = query.select_sql
+ limit = None
+ else:
+ sql = query.executed_sql
+ limit = ParsedQuery(sql).limit
+ if limit is not None and query.limiting_factor in {
+ LimitingFactor.QUERY,
+ LimitingFactor.DROPDOWN,
+ LimitingFactor.QUERY_AND_DROPDOWN,
+ }:
+ # remove extra row from `increased_limit`
+ limit -= 1
+ df = query.database.get_df(sql, query.schema)[:limit]
+
csv_data = csv.df_to_escaped_csv(df, index=False, **config["CSV_EXPORT"])
quoted_csv_name = parse.quote(query.name)
response = CsvResponse(
diff --git a/tests/db_engine_specs/base_engine_spec_tests.py b/tests/db_engine_specs/base_engine_spec_tests.py
index 097e0ba..e1ca7d3 100644
--- a/tests/db_engine_specs/base_engine_spec_tests.py
+++ b/tests/db_engine_specs/base_engine_spec_tests.py
@@ -81,6 +81,19 @@ class TestDbEngineSpecs(TestDbEngineSpec):
"SELECT * FROM (SELECT * FROM a LIMIT 10) LIMIT 1000",
)
+ def test_limit_query_without_force(self):
+ self.sql_limit_regex(
+ "SELECT * FROM a LIMIT 10", "SELECT * FROM a LIMIT 10", limit=11,
+ )
+
+ def test_limit_query_with_force(self):
+ self.sql_limit_regex(
+ "SELECT * FROM a LIMIT 10",
+ "SELECT * FROM a LIMIT 11",
+ limit=11,
+ force=True,
+ )
+
def test_limit_with_expr(self):
self.sql_limit_regex(
"""
diff --git a/tests/db_engine_specs/base_tests.py b/tests/db_engine_specs/base_tests.py
index d4ac2ae..ca830ee 100644
--- a/tests/db_engine_specs/base_tests.py
+++ b/tests/db_engine_specs/base_tests.py
@@ -25,8 +25,13 @@ from superset.models.core import Database
class TestDbEngineSpec(SupersetTestCase):
def sql_limit_regex(
- self, sql, expected_sql, engine_spec_class=MySQLEngineSpec, limit=1000
+ self,
+ sql,
+ expected_sql,
+ engine_spec_class=MySQLEngineSpec,
+ limit=1000,
+ force=False,
):
main = Database(database_name="test_database", sqlalchemy_uri="sqlite://")
- limited = engine_spec_class.apply_limit_to_sql(sql, limit, main)
+ limited = engine_spec_class.apply_limit_to_sql(sql, limit, main, force)
self.assertEqual(expected_sql, limited)
diff --git a/tests/sqllab_tests.py b/tests/sqllab_tests.py
index afed7a6..8e1e88d 100644
--- a/tests/sqllab_tests.py
+++ b/tests/sqllab_tests.py
@@ -31,10 +31,11 @@ from superset.connectors.sqla.models import SqlaTable
from superset.db_engine_specs import BaseEngineSpec
from superset.errors import ErrorLevel, SupersetErrorType
from superset.models.core import Database
-from superset.models.sql_lab import Query, SavedQuery
+from superset.models.sql_lab import LimitingFactor, Query, SavedQuery
from superset.result_set import SupersetResultSet
from superset.sql_lab import (
execute_sql_statements,
+ execute_sql_statement,
get_sql_results,
SqlLabException,
SqlLabTimeoutException,
@@ -119,7 +120,6 @@ class TestSqlLab(SupersetTestCase):
)
assert saved_query_.rows is not None
assert saved_query_.last_run == datetime.now()
-
# Rollback changes
db.session.delete(saved_query_)
db.session.commit()
@@ -507,18 +507,44 @@ class TestSqlLab(SupersetTestCase):
"SELECT * FROM birth_names", client_id="sql_limit_2", query_limit=test_limit
)
self.assertEqual(len(data["data"]), test_limit)
+
data = self.run_sql(
"SELECT * FROM birth_names LIMIT {}".format(test_limit),
client_id="sql_limit_3",
query_limit=test_limit + 1,
)
self.assertEqual(len(data["data"]), test_limit)
+ self.assertEqual(data["query"]["limitingFactor"], LimitingFactor.QUERY)
+
data = self.run_sql(
"SELECT * FROM birth_names LIMIT {}".format(test_limit + 1),
client_id="sql_limit_4",
query_limit=test_limit,
)
self.assertEqual(len(data["data"]), test_limit)
+ self.assertEqual(data["query"]["limitingFactor"], LimitingFactor.DROPDOWN)
+
+ data = self.run_sql(
+ "SELECT * FROM birth_names LIMIT {}".format(test_limit),
+ client_id="sql_limit_5",
+ query_limit=test_limit,
+ )
+ self.assertEqual(len(data["data"]), test_limit)
+ self.assertEqual(
+ data["query"]["limitingFactor"], LimitingFactor.QUERY_AND_DROPDOWN
+ )
+
+ data = self.run_sql(
+ "SELECT * FROM birth_names", client_id="sql_limit_6", query_limit=10000,
+ )
+ self.assertEqual(len(data["data"]), 1200)
+ self.assertEqual(data["query"]["limitingFactor"], LimitingFactor.NOT_LIMITED)
+
+ data = self.run_sql(
+ "SELECT * FROM birth_names", client_id="sql_limit_7", query_limit=1200,
+ )
+ self.assertEqual(len(data["data"]), 1200)
+ self.assertEqual(data["query"]["limitingFactor"], LimitingFactor.NOT_LIMITED)
def test_query_api_filter(self) -> None:
"""
diff --git a/tests/superset_test_config.py b/tests/superset_test_config.py
index 0ce8909..22afcc0 100644
--- a/tests/superset_test_config.py
+++ b/tests/superset_test_config.py
@@ -47,7 +47,7 @@ if "sqlite" in SQLALCHEMY_DATABASE_URI:
PRESTO_POLL_INTERVAL = 0.1
HIVE_POLL_INTERVAL = 0.1
-SQL_MAX_ROW = 666
+SQL_MAX_ROW = 10000
SQLLAB_CTAS_NO_LIMIT = True # SQL_MAX_ROW will not take affect for the CTA queries
FEATURE_FLAGS = {
**FEATURE_FLAGS,