You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by jo...@apache.org on 2019/08/05 18:57:07 UTC
[incubator-superset] branch master updated: [DB Engine] Support old
and new Presto syntax (#7977)
This is an automated email from the ASF dual-hosted git repository.
johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new d58dbad [DB Engine] Support old and new Presto syntax (#7977)
d58dbad is described below
commit d58dbad076f61cfd05fa8280ad13442d29360873
Author: Erik Ritter <er...@airbnb.com>
AuthorDate: Mon Aug 5 11:56:56 2019 -0700
[DB Engine] Support old and new Presto syntax (#7977)
---
docs/installation.rst | 12 ++++++++++++
superset/db_engine_specs/presto.py | 27 +++++++++++++++++++++------
superset/views/database/__init__.py | 4 +++-
tests/db_engine_specs_test.py | 2 ++
4 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/docs/installation.rst b/docs/installation.rst
index 68bc5ce..919d6ed 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -647,6 +647,18 @@ Note that you can run the ``superset refresh_druid`` command to refresh the
metadata from your Druid cluster(s)
+Presto
+------
+
+By default Superset assumes the most recent version of Presto is being used when
+querying the datasource. If you're using an older version of presto, you can configure
+it in the ``extra`` parameter::
+
+ {
+ "version": "0.123"
+ }
+
+
CORS
----
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index 4f4b03e..6708b06 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -16,6 +16,7 @@
# under the License.
# pylint: disable=C,R,W
from collections import OrderedDict
+from distutils.version import StrictVersion
import logging
import re
import textwrap
@@ -797,7 +798,7 @@ class PrestoEngineSpec(BaseEngineSpec):
full_table_name = table_name
if schema_name and "." not in table_name:
full_table_name = "{}.{}".format(schema_name, table_name)
- pql = cls._partition_query(full_table_name)
+ pql = cls._partition_query(full_table_name, database)
col_names, latest_parts = cls.latest_partition(
table_name, schema_name, database, show_first=True
)
@@ -872,7 +873,9 @@ class PrestoEngineSpec(BaseEngineSpec):
return utils.error_msg_from_exception(e)
@classmethod
- def _partition_query(cls, table_name, limit=0, order_by=None, filters=None):
+ def _partition_query(
+ cls, table_name, database, limit=0, order_by=None, filters=None
+ ):
"""Returns a partition query
:param table_name: the name of the table to get partitions from
@@ -900,10 +903,20 @@ class PrestoEngineSpec(BaseEngineSpec):
l.append(f"{field} = '{value}'")
where_clause = "WHERE " + " AND ".join(l)
+ presto_version = database.get_extra().get("version")
+
+ # Partition select syntax changed in v0.199, so check here.
+ # Default to the new syntax if version is unset.
+ partition_select_clause = (
+ f'SELECT * FROM "{table_name}$partitions"'
+ if not presto_version
+ or StrictVersion(presto_version) >= StrictVersion("0.199")
+ else f"SHOW PARTITIONS FROM {table_name}"
+ )
+
sql = textwrap.dedent(
f"""\
- SELECT * FROM "{table_name}$partitions"
-
+ {partition_select_clause}
{where_clause}
{order_by_clause}
{limit_clause}
@@ -965,7 +978,7 @@ class PrestoEngineSpec(BaseEngineSpec):
)
column_names = indexes[0]["column_names"]
part_fields = [(column_name, True) for column_name in column_names]
- sql = cls._partition_query(table_name, 1, part_fields)
+ sql = cls._partition_query(table_name, database, 1, part_fields)
df = database.get_df(sql, schema)
return column_names, cls._latest_partition_from_df(df)
@@ -1012,7 +1025,9 @@ class PrestoEngineSpec(BaseEngineSpec):
if field not in kwargs.keys():
field_to_return = field
- sql = cls._partition_query(table_name, 1, [(field_to_return, True)], kwargs)
+ sql = cls._partition_query(
+ table_name, database, 1, [(field_to_return, True)], kwargs
+ )
df = database.get_df(sql, schema)
if df.empty:
return ""
diff --git a/superset/views/database/__init__.py b/superset/views/database/__init__.py
index 15b0d52..9b7c8cf 100644
--- a/superset/views/database/__init__.py
+++ b/superset/views/database/__init__.py
@@ -143,7 +143,9 @@ class DatabaseMixin: # noqa
'Specify it as **"schemas_allowed_for_csv_upload": '
'["public", "csv_upload"]**. '
"If database flavor does not support schema or any schema is allowed "
- "to be accessed, just leave the list empty",
+ "to be accessed, just leave the list empty"
+ "4. the ``version`` field is a string specifying the this db's version. "
+ "This should be used with Presto DBs so that the syntax is correct",
True,
),
"impersonate_user": _(
diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py
index c9c398f..a70c8cd 100644
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -766,6 +766,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
def test_presto_extra_table_metadata(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
+ db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
result = PrestoEngineSpec.extra_table_metadata(db, "test_table", "test_schema")
@@ -774,6 +775,7 @@ class DbEngineSpecsTestCase(SupersetTestCase):
def test_presto_where_latest_partition(self):
db = mock.Mock()
db.get_indexes = mock.Mock(return_value=[{"column_names": ["ds", "hour"]}])
+ db.get_extra = mock.Mock(return_value={})
df = pd.DataFrame({"ds": ["01-01-19"], "hour": [1]})
db.get_df = mock.Mock(return_value=df)
columns = [{"name": "ds"}, {"name": "hour"}]