You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2019/06/27 18:20:09 UTC
[incubator-superset] branch master updated: feat: query based on all partitions (#7782)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 5994e43  feat: query based on all partitions (#7782)
5994e43 is described below

commit 5994e432d41330019d97e215b7250db4ebe29645
Author: Kim Truong <47...@users.noreply.github.com>
AuthorDate: Thu Jun 27 11:19:55 2019 -0700

    feat: query based on all partitions (#7782)
    
    * feat: query based on all partitions
    
    * fix: address Beto's comments
---
 superset/db_engine_specs/presto.py | 29 +++++++++++++++++------------
 tests/db_engine_specs_test.py      | 22 +++++++++++++++++++++-
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index f9888a2..63171f3 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -794,13 +794,13 @@ class PrestoEngineSpec(BaseEngineSpec):
         if schema_name and "." not in table_name:
             full_table_name = "{}.{}".format(schema_name, table_name)
         pql = cls._partition_query(full_table_name)
-        col_name, latest_part = cls.latest_partition(
+        col_names, latest_parts = cls.latest_partition(
             table_name, schema_name, database, show_first=True
         )
         return {
             "partitions": {
                 "cols": cols,
-                "latest": {col_name: latest_part},
+                "latest": dict(zip(col_names, latest_parts)),
                 "partitionQuery": pql,
             }
         }
@@ -910,22 +910,26 @@ class PrestoEngineSpec(BaseEngineSpec):
     @classmethod
     def where_latest_partition(cls, table_name, schema, database, qry, columns=None):
         try:
-            col_name, value = cls.latest_partition(
+            col_names, values = cls.latest_partition(
                 table_name, schema, database, show_first=True
             )
         except Exception:
             # table is not partitioned
             return False
-        if value is not None:
-            for c in columns:
-                if c.get("name") == col_name:
-                    return qry.where(Column(col_name) == value)
-        return False
+
+        if values is None:
+            return False
+
+        column_names = {column.get('name') for column in columns or []}
+        for col_name, value in zip(col_names, values):
+            if col_name in column_names:
+                qry = qry.where(Column(col_name) == value)
+        return qry
 
     @classmethod
     def _latest_partition_from_df(cls, df):
         if not df.empty:
-            return df.to_records(index=False)[0][0]
+            return df.to_records(index=False)[0].item()
 
     @classmethod
     def latest_partition(cls, table_name, schema, database, show_first=False):
@@ -955,10 +959,11 @@ class PrestoEngineSpec(BaseEngineSpec):
                 "to use this function. You may want to use "
                 "`presto.latest_sub_partition`"
             )
-        part_field = indexes[0]["column_names"][0]
-        sql = cls._partition_query(table_name, 1, [(part_field, True)])
+        column_names = indexes[0]["column_names"]
+        part_fields = [(column_name, True) for column_name in column_names]
+        sql = cls._partition_query(table_name, 1, part_fields)
         df = database.get_df(sql, schema)
-        return part_field, cls._latest_partition_from_df(df)
+        return column_names, cls._latest_partition_from_df(df)
 
     @classmethod
     def latest_sub_partition(cls, table_name, schema, database, **kwargs):
diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py
index 59d25c3..4ec0f01 100644
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -17,9 +17,11 @@
 import unittest
 from unittest import mock
 
-from sqlalchemy import column, literal_column, select, table
+import pandas as pd
+from sqlalchemy import column, literal_column, table
 from sqlalchemy.dialects import mssql, oracle, postgresql
 from sqlalchemy.engine.result import RowProxy
+from sqlalchemy.sql import select
 from sqlalchemy.types import String, UnicodeText
 
 from superset.db_engine_specs import engines
@@ -760,6 +762,24 @@ class DbEngineSpecsTestCase(SupersetTestCase):
         self.assertEqual(actual_data, expected_data)
         self.assertEqual(actual_expanded_cols, expected_expanded_cols)
 
+    def test_presto_extra_table_metadata(self):
+        db = mock.Mock()
+        db.get_indexes = mock.Mock(return_value=[{'column_names': ['ds', 'hour']}])
+        df = pd.DataFrame({'ds': ['01-01-19'], 'hour': [1]})
+        db.get_df = mock.Mock(return_value=df)
+        result = PrestoEngineSpec.extra_table_metadata(db, 'test_table', 'test_schema')
+        self.assertEqual({'ds': '01-01-19', 'hour': 1}, result['partitions']['latest'])
+
+    def test_presto_where_latest_partition(self):
+        db = mock.Mock()
+        db.get_indexes = mock.Mock(return_value=[{'column_names': ['ds', 'hour']}])
+        df = pd.DataFrame({'ds': ['01-01-19'], 'hour': [1]})
+        db.get_df = mock.Mock(return_value=df)
+        columns = [{'name': 'ds'}, {'name': 'hour'}]
+        result = PrestoEngineSpec.where_latest_partition('test_table', 'test_schema', db, select(), columns)
+        query_result = str(result.compile(compile_kwargs={'literal_binds': True}))
+        self.assertEqual('SELECT  \nWHERE ds = \'01-01-19\' AND hour = 1', query_result)
+
     def test_hive_get_view_names_return_empty_list(self):
         self.assertEquals([], HiveEngineSpec.get_view_names(mock.ANY, mock.ANY))