You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by vi...@apache.org on 2020/09/11 12:07:18 UTC
[incubator-superset] 20/34: refactor(database): use
SupersetResultSet on SqlaTable.get_df() (#10707)
This is an automated email from the ASF dual-hosted git repository.
villebro pushed a commit to branch 0.38
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
commit 19db4415430e479a15789e4eddec3505a468fc5c
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Fri Aug 28 21:12:03 2020 +0300
refactor(database): use SupersetResultSet on SqlaTable.get_df() (#10707)
* refactor(database): use SupersetResultSet on SqlaTable.get_df()
* lint
* change cypress test
---
.../explore/visualizations/table.test.ts | 21 +++++++++++----------
superset/db_engine_specs/base.py | 6 ++++--
superset/db_engine_specs/bigquery.py | 4 +++-
superset/db_engine_specs/exasol.py | 6 ++++--
superset/db_engine_specs/hive.py | 6 ++++--
superset/db_engine_specs/mssql.py | 4 +++-
superset/db_engine_specs/postgres.py | 4 +++-
superset/models/core.py | 14 ++++++--------
superset/typing.py | 4 ++--
superset/viz.py | 1 -
10 files changed, 40 insertions(+), 30 deletions(-)
diff --git a/superset-frontend/cypress-base/cypress/integration/explore/visualizations/table.test.ts b/superset-frontend/cypress-base/cypress/integration/explore/visualizations/table.test.ts
index c7015d9..77f9c6f 100644
--- a/superset-frontend/cypress-base/cypress/integration/explore/visualizations/table.test.ts
+++ b/superset-frontend/cypress-base/cypress/integration/explore/visualizations/table.test.ts
@@ -29,6 +29,16 @@ import readResponseBlob from '../../../utils/readResponseBlob';
describe('Visualization > Table', () => {
const VIZ_DEFAULTS = { ...FORM_DATA_DEFAULTS, viz_type: 'table' };
+ const PERCENT_METRIC = {
+ expressionType: 'SQL',
+ sqlExpression: 'CAST(SUM(sum_girls)+AS+FLOAT)/SUM(num)',
+ column: null,
+ aggregate: null,
+ hasCustomLabel: true,
+ label: 'Girls',
+ optionName: 'metric_6qwzgc8bh2v_zox7hil1mzs',
+ };
+
beforeEach(() => {
cy.login();
cy.server();
@@ -119,7 +129,7 @@ describe('Visualization > Table', () => {
it('Test table with percent metrics and groupby', () => {
const formData = {
...VIZ_DEFAULTS,
- percent_metrics: NUM_METRIC,
+ percent_metrics: PERCENT_METRIC,
metrics: [],
groupby: ['name'],
};
@@ -214,15 +224,6 @@ describe('Visualization > Table', () => {
});
it('Tests table number formatting with % in metric name', () => {
- const PERCENT_METRIC = {
- expressionType: 'SQL',
- sqlExpression: 'CAST(SUM(sum_girls)+AS+FLOAT)/SUM(num)',
- column: null,
- aggregate: null,
- hasCustomLabel: true,
- label: 'Girls',
- optionName: 'metric_6qwzgc8bh2v_zox7hil1mzs',
- };
const formData = {
...VIZ_DEFAULTS,
percent_metrics: PERCENT_METRIC,
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 331961c..9c9b6a8 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -300,7 +300,9 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
return select_exprs
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
"""
:param cursor: Cursor instance
@@ -309,7 +311,7 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
"""
if cls.arraysize:
cursor.arraysize = cls.arraysize
- if cls.limit_method == LimitMethod.FETCH_MANY:
+ if cls.limit_method == LimitMethod.FETCH_MANY and limit:
return cursor.fetchmany(limit)
return cursor.fetchall()
diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
index ea33531..71ae828 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -85,7 +85,9 @@ class BigQueryEngineSpec(BaseEngineSpec):
return None
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
data = super().fetch_data(cursor, limit)
# Support type BigQuery Row, introduced here PR #4071
# google.cloud.bigquery.table.Row
diff --git a/superset/db_engine_specs/exasol.py b/superset/db_engine_specs/exasol.py
index a485be5..327cc3a 100644
--- a/superset/db_engine_specs/exasol.py
+++ b/superset/db_engine_specs/exasol.py
@@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from typing import Any, List, Tuple
+from typing import Any, List, Optional, Tuple
from superset.db_engine_specs.base import BaseEngineSpec
@@ -40,7 +40,9 @@ class ExasolEngineSpec(BaseEngineSpec): # pylint: disable=abstract-method
}
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
data = super().fetch_data(cursor, limit)
# Lists of `pyodbc.Row` need to be unpacked further
return cls.pyodbc_rows_to_tuples(data)
diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
index 9cbcfb7..e75e27a 100644
--- a/superset/db_engine_specs/hive.py
+++ b/superset/db_engine_specs/hive.py
@@ -110,7 +110,9 @@ class HiveEngineSpec(PrestoEngineSpec):
return BaseEngineSpec.get_all_datasource_names(database, datasource_type)
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
import pyhive
from TCLIService import ttypes
@@ -118,7 +120,7 @@ class HiveEngineSpec(PrestoEngineSpec):
if state.operationState == ttypes.TOperationState.ERROR_STATE:
raise Exception("Query error", state.errorMessage)
try:
- return super(HiveEngineSpec, cls).fetch_data(cursor, limit)
+ return super().fetch_data(cursor, limit)
except pyhive.exc.ProgrammingError:
return []
diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py
index abe1f6c..ead06f8 100644
--- a/superset/db_engine_specs/mssql.py
+++ b/superset/db_engine_specs/mssql.py
@@ -68,7 +68,9 @@ class MssqlEngineSpec(BaseEngineSpec):
return None
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
data = super().fetch_data(cursor, limit)
# Lists of `pyodbc.Row` need to be unpacked further
return cls.pyodbc_rows_to_tuples(data)
diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py
index 0ccf51d..1ec433f 100644
--- a/superset/db_engine_specs/postgres.py
+++ b/superset/db_engine_specs/postgres.py
@@ -53,7 +53,9 @@ class PostgresBaseEngineSpec(BaseEngineSpec):
}
@classmethod
- def fetch_data(cls, cursor: Any, limit: int) -> List[Tuple[Any, ...]]:
+ def fetch_data(
+ cls, cursor: Any, limit: Optional[int] = None
+ ) -> List[Tuple[Any, ...]]:
cursor.tzinfo_factory = FixedOffsetTimezone
if not cursor.description:
return []
diff --git a/superset/models/core.py b/superset/models/core.py
index 7660150..775a9f0 100755
--- a/superset/models/core.py
+++ b/superset/models/core.py
@@ -57,6 +57,7 @@ from superset.db_engine_specs.base import TimeGrain
from superset.models.dashboard import Dashboard
from superset.models.helpers import AuditMixinNullable, ImportMixin
from superset.models.tags import DashboardUpdater, FavStarUpdater
+from superset.result_set import SupersetResultSet
from superset.utils import cache as cache_util, core as utils
config = app.config
@@ -392,21 +393,18 @@ class Database(
_log_query(sqls[-1])
self.db_engine_spec.execute(cursor, sqls[-1])
- if cursor.description is not None:
- columns = [col_desc[0] for col_desc in cursor.description]
- else:
- columns = []
-
- df = pd.DataFrame.from_records(
- data=list(cursor.fetchall()), columns=columns, coerce_float=True
+ data = self.db_engine_spec.fetch_data(cursor)
+ result_set = SupersetResultSet(
+ data, cursor.description, self.db_engine_spec
)
-
+ df = result_set.to_pandas_df()
if mutator:
mutator(df)
for k, v in df.dtypes.items():
if v.type == numpy.object_ and needs_conversion(df[k]):
df[k] = df[k].apply(utils.json_dumps_w_dates)
+
return df
def compile_sqla_query(self, qry: Select, schema: Optional[str] = None) -> str:
diff --git a/superset/typing.py b/superset/typing.py
index e238000..6f1fa2e 100644
--- a/superset/typing.py
+++ b/superset/typing.py
@@ -14,7 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
from flask import Flask
from flask_caching import Cache
@@ -25,7 +25,7 @@ DbapiDescriptionRow = Tuple[
str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool
]
DbapiDescription = Union[List[DbapiDescriptionRow], Tuple[DbapiDescriptionRow, ...]]
-DbapiResult = List[Union[List[Any], Tuple[Any, ...]]]
+DbapiResult = Sequence[Union[List[Any], Tuple[Any, ...]]]
FilterValue = Union[float, int, str]
FilterValues = Union[FilterValue, List[FilterValue], Tuple[FilterValue]]
FormData = Dict[str, Any]
diff --git a/superset/viz.py b/superset/viz.py
index 17bebc7..d4487b4 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -26,7 +26,6 @@ import inspect
import logging
import math
import re
-import uuid
from collections import defaultdict, OrderedDict
from datetime import datetime, timedelta
from itertools import product