You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2019/06/27 18:20:09 UTC
[incubator-superset] branch master updated: feat: query based on
all partitions (#7782)
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new 5994e43 feat: query based on all partitions (#7782)
5994e43 is described below
commit 5994e432d41330019d97e215b7250db4ebe29645
Author: Kim Truong <47...@users.noreply.github.com>
AuthorDate: Thu Jun 27 11:19:55 2019 -0700
feat: query based on all partitions (#7782)
* feat: query based on all partitions
* fix: address Beto's comments
---
superset/db_engine_specs/presto.py | 29 +++++++++++++++++------------
tests/db_engine_specs_test.py | 22 +++++++++++++++++++++-
2 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index f9888a2..63171f3 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -794,13 +794,13 @@ class PrestoEngineSpec(BaseEngineSpec):
if schema_name and "." not in table_name:
full_table_name = "{}.{}".format(schema_name, table_name)
pql = cls._partition_query(full_table_name)
- col_name, latest_part = cls.latest_partition(
+ col_names, latest_parts = cls.latest_partition(
table_name, schema_name, database, show_first=True
)
return {
"partitions": {
"cols": cols,
- "latest": {col_name: latest_part},
+ "latest": dict(zip(col_names, latest_parts)),
"partitionQuery": pql,
}
}
@@ -910,22 +910,26 @@ class PrestoEngineSpec(BaseEngineSpec):
@classmethod
def where_latest_partition(cls, table_name, schema, database, qry, columns=None):
try:
- col_name, value = cls.latest_partition(
+ col_names, values = cls.latest_partition(
table_name, schema, database, show_first=True
)
except Exception:
# table is not partitioned
return False
- if value is not None:
- for c in columns:
- if c.get("name") == col_name:
- return qry.where(Column(col_name) == value)
- return False
+
+ if values is None:
+ return False
+
+ column_names = {column.get('name') for column in columns or []}
+ for col_name, value in zip(col_names, values):
+ if col_name in column_names:
+ qry = qry.where(Column(col_name) == value)
+ return qry
@classmethod
def _latest_partition_from_df(cls, df):
if not df.empty:
- return df.to_records(index=False)[0][0]
+ return df.to_records(index=False)[0].item()
@classmethod
def latest_partition(cls, table_name, schema, database, show_first=False):
@@ -955,10 +959,11 @@ class PrestoEngineSpec(BaseEngineSpec):
"to use this function. You may want to use "
"`presto.latest_sub_partition`"
)
- part_field = indexes[0]["column_names"][0]
- sql = cls._partition_query(table_name, 1, [(part_field, True)])
+ column_names = indexes[0]["column_names"]
+ part_fields = [(column_name, True) for column_name in column_names]
+ sql = cls._partition_query(table_name, 1, part_fields)
df = database.get_df(sql, schema)
- return part_field, cls._latest_partition_from_df(df)
+ return column_names, cls._latest_partition_from_df(df)
@classmethod
def latest_sub_partition(cls, table_name, schema, database, **kwargs):
diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py
index 59d25c3..4ec0f01 100644
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -17,9 +17,11 @@
import unittest
from unittest import mock
-from sqlalchemy import column, literal_column, select, table
+import pandas as pd
+from sqlalchemy import column, literal_column, table
from sqlalchemy.dialects import mssql, oracle, postgresql
from sqlalchemy.engine.result import RowProxy
+from sqlalchemy.sql import select
from sqlalchemy.types import String, UnicodeText
from superset.db_engine_specs import engines
@@ -760,6 +762,24 @@ class DbEngineSpecsTestCase(SupersetTestCase):
self.assertEqual(actual_data, expected_data)
self.assertEqual(actual_expanded_cols, expected_expanded_cols)
+ def test_presto_extra_table_metadata(self):
+ db = mock.Mock()
+ db.get_indexes = mock.Mock(return_value=[{'column_names': ['ds', 'hour']}])
+ df = pd.DataFrame({'ds': ['01-01-19'], 'hour': [1]})
+ db.get_df = mock.Mock(return_value=df)
+ result = PrestoEngineSpec.extra_table_metadata(db, 'test_table', 'test_schema')
+ self.assertEqual({'ds': '01-01-19', 'hour': 1}, result['partitions']['latest'])
+
+ def test_presto_where_latest_partition(self):
+ db = mock.Mock()
+ db.get_indexes = mock.Mock(return_value=[{'column_names': ['ds', 'hour']}])
+ df = pd.DataFrame({'ds': ['01-01-19'], 'hour': [1]})
+ db.get_df = mock.Mock(return_value=df)
+ columns = [{'name': 'ds'}, {'name': 'hour'}]
+ result = PrestoEngineSpec.where_latest_partition('test_table', 'test_schema', db, select(), columns)
+ query_result = str(result.compile(compile_kwargs={'literal_binds': True}))
+ self.assertEqual('SELECT \nWHERE ds = \'01-01-19\' AND hour = 1', query_result)
+
def test_hive_get_view_names_return_empty_list(self):
self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))