You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2019/05/31 20:38:21 UTC
[incubator-superset] branch master updated: fix: handle presto columns with whitespace (#7630)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 9acafd5  fix: handle presto columns with whitespace (#7630)
9acafd5 is described below

commit 9acafd5b759db42b2f3477b35f51de85a89b61fe
Author: Kim Truong <47...@users.noreply.github.com>
AuthorDate: Fri May 31 13:38:05 2019 -0700

    fix: handle presto columns with whitespace (#7630)
---
 superset/db_engine_specs.py   | 34 +++++++++++++++++++++++++---------
 tests/db_engine_specs_test.py |  9 ++++++++-
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 00a9310..89e677b 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -945,11 +945,20 @@ class PrestoEngineSpec(BaseEngineSpec):
             r'{}(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)'.format(delimiter), data_type)
 
     @classmethod
-    def _parse_structural_column(cls, full_data_type: str, result: List[dict]) -> None:
+    def _parse_structural_column(cls,
+                                 parent_column_name: str,
+                                 parent_data_type: str,
+                                 result: List[dict]) -> None:
         """
         Parse a row or array column
         :param result: list tracking the results
         """
+        formatted_parent_column_name = parent_column_name
+        # Quote the column name if there is a space
+        if ' ' in parent_column_name:
+            formatted_parent_column_name = f'"{parent_column_name}"'
+        full_data_type = f'{formatted_parent_column_name} {parent_data_type}'
+        original_result_len = len(result)
         # split on open parenthesis ( to get the structural
         # data type and its component types
         data_types = cls._split_data_type(full_data_type, r'\(')
@@ -1001,6 +1010,11 @@ class PrestoEngineSpec(BaseEngineSpec):
                     # Because it is an array of a basic data type. We have finished
                     # parsing the structural data type and can move on.
                     stack.pop()
+        # Unquote the column name if necessary
+        if formatted_parent_column_name != parent_column_name:
+            for index in range(original_result_len, len(result)):
+                result[index]['name'] = result[index]['name'].replace(
+                    formatted_parent_column_name, parent_column_name)
 
     @classmethod
     def _show_columns(
@@ -1037,9 +1051,8 @@ class PrestoEngineSpec(BaseEngineSpec):
             try:
                 # parse column if it is a row or array
                 if 'array' in column.Type or 'row' in column.Type:
-                    full_data_type = '{} {}'.format(column.Column, column.Type)
                     structural_column_index = len(result)
-                    cls._parse_structural_column(full_data_type, result)
+                    cls._parse_structural_column(column.Column, column.Type, result)
                     result[structural_column_index]['nullable'] = getattr(
                         column, 'Null', True)
                     result[structural_column_index]['default'] = None
@@ -1244,8 +1257,9 @@ class PrestoEngineSpec(BaseEngineSpec):
         for column in selected_columns:
             if column['type'].startswith('ROW'):
                 parsed_row_columns: List[dict] = []
-                full_data_type = '{} {}'.format(column['name'], column['type'].lower())
-                cls._parse_structural_column(full_data_type, parsed_row_columns)
+                cls._parse_structural_column(column['name'],
+                                             column['type'].lower(),
+                                             parsed_row_columns)
                 expanded_columns = expanded_columns + parsed_row_columns[1:]
                 filtered_row_columns, array_columns = cls._filter_out_array_nested_cols(
                     parsed_row_columns)
@@ -1257,8 +1271,9 @@ class PrestoEngineSpec(BaseEngineSpec):
                                             array_column_hierarchy)
             elif column['type'].startswith('ARRAY'):
                 parsed_array_columns: List[dict] = []
-                full_data_type = '{} {}'.format(column['name'], column['type'].lower())
-                cls._parse_structural_column(full_data_type, parsed_array_columns)
+                cls._parse_structural_column(column['name'],
+                                             column['type'].lower(),
+                                             parsed_array_columns)
                 expanded_columns = expanded_columns + parsed_array_columns[1:]
                 cls._build_column_hierarchy(parsed_array_columns,
                                             ['ROW', 'ARRAY'],
@@ -1523,8 +1538,9 @@ class PrestoEngineSpec(BaseEngineSpec):
         # Get the list of all columns (selected fields and their nested fields)
         for column in columns:
             if column['type'].startswith('ARRAY') or column['type'].startswith('ROW'):
-                full_data_type = '{} {}'.format(column['name'], column['type'].lower())
-                cls._parse_structural_column(full_data_type, all_columns)
+                cls._parse_structural_column(column['name'],
+                                             column['type'].lower(),
+                                             all_columns)
             else:
                 all_columns.append(column)
 
diff --git a/tests/db_engine_specs_test.py b/tests/db_engine_specs_test.py
index 02dbbae..4491914 100644
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -350,7 +350,14 @@ class DbEngineSpecsTestCase(SupersetTestCase):
             ('column_name.nested_obj', 'FLOAT')]
         self.verify_presto_column(presto_column, expected_results)
 
-    def test_presto_get_simple_row_column_with_tricky_name(self):
+    def test_presto_get_simple_row_column_with_name_containing_whitespace(self):
+        presto_column = ('column name', 'row(nested_obj double)', '')
+        expected_results = [
+            ('column name', 'ROW'),
+            ('column name.nested_obj', 'FLOAT')]
+        self.verify_presto_column(presto_column, expected_results)
+
+    def test_presto_get_simple_row_column_with_tricky_nested_field_name(self):
         presto_column = ('column_name', 'row("Field Name(Tricky, Name)" double)', '')
         expected_results = [
             ('column_name', 'ROW'),