You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by jo...@apache.org on 2023/01/21 00:00:45 UTC
[superset] branch master updated: fix(hive): Regression in #21794 (#22794)
This is an automated email from the ASF dual-hosted git repository.
johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 0b31b2cb87 fix(hive): Regression in #21794 (#22794)
0b31b2cb87 is described below
commit 0b31b2cb877ce629c4f346ea434cd02c4a8a469b
Author: John Bodley <45...@users.noreply.github.com>
AuthorDate: Sat Jan 21 13:00:36 2023 +1300
fix(hive): Regression in #21794 (#22794)
---
superset/db_engine_specs/hive.py | 37 ++++++++++++++++++++-
superset/db_engine_specs/presto.py | 7 ++--
.../db_engine_specs/hive_tests.py | 38 ++++++++++++++++++++++
3 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/superset/db_engine_specs/hive.py b/superset/db_engine_specs/hive.py
index 0c491553ae..3c90975fa3 100644
--- a/superset/db_engine_specs/hive.py
+++ b/superset/db_engine_specs/hive.py
@@ -22,7 +22,7 @@ import re
import tempfile
import time
from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Any, Dict, List, Optional, Set, Tuple, TYPE_CHECKING
from urllib import parse
import numpy as np
@@ -576,3 +576,38 @@ class HiveEngineSpec(PrestoEngineSpec):
"""
return True
+
+ @classmethod
+ def get_view_names(
+ cls,
+ database: "Database",
+ inspector: Inspector,
+ schema: Optional[str],
+ ) -> Set[str]:
+ """
+ Get all the view names within the specified schema.
+
+ Per the SQLAlchemy definition if the schema is omitted the database’s default
+ schema is used, however some dialects infer the request as schema agnostic.
+
+ Note that PyHive's Hive SQLAlchemy dialect does not adhere to the specification
+ where the `get_view_names` method returns both real tables and views. Futhermore
+ the dialect wrongfully infers the request as schema agnostic when the schema is
+ omitted.
+
+ :param database: The database to inspect
+ :param inspector: The SQLAlchemy inspector
+ :param schema: The schema to inspect
+ :returns: The view names
+ """
+
+ sql = "SHOW VIEWS"
+
+ if schema:
+ sql += f" IN `{schema}`"
+
+ with database.get_raw_connection(schema=schema) as conn:
+ cursor = conn.cursor()
+ cursor.execute(sql)
+ results = cursor.fetchall()
+ return {row[0] for row in results}
diff --git a/superset/db_engine_specs/presto.py b/superset/db_engine_specs/presto.py
index e1aa918879..2e8fc09fd1 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -638,9 +638,10 @@ class PrestoEngineSpec(PrestoBaseEngineSpec):
Per the SQLAlchemy definition if the schema is omitted the database’s default
schema is used, however some dialects infer the request as schema agnostic.
- Note that PyHive's Hive and Presto SQLAlchemy dialects do not implement the
- `get_view_names` method. To ensure consistency with the `get_table_names` method
- the request is deemed schema agnostic when the schema is omitted.
+ Note that PyHive's Presto SQLAlchemy dialect does not adhere to the
+ specification as the `get_view_names` method is not defined. Futhermore the
+ dialect wrongfully infers the request as schema agnostic when the schema is
+ omitted.
:param database: The database to inspect
:param inspector: The SQLAlchemy inspector
diff --git a/tests/integration_tests/db_engine_specs/hive_tests.py b/tests/integration_tests/db_engine_specs/hive_tests.py
index 366648effa..b39f265897 100644
--- a/tests/integration_tests/db_engine_specs/hive_tests.py
+++ b/tests/integration_tests/db_engine_specs/hive_tests.py
@@ -403,3 +403,41 @@ def test__latest_partition_from_df():
["ds=01-01-19/hour=1", "ds=01-03-19/hour=1", "ds=01-02-19/hour=2"],
["01-03-19", "1"],
)
+
+
+def test_get_view_names_with_schema():
+ database = mock.MagicMock()
+ mock_execute = mock.MagicMock()
+ database.get_raw_connection().__enter__().cursor().execute = mock_execute
+ database.get_raw_connection().__enter__().cursor().fetchall = mock.MagicMock(
+ return_value=[["a", "b,", "c"], ["d", "e"]]
+ )
+
+ schema = "schema"
+ result = HiveEngineSpec.get_view_names(database, mock.Mock(), schema)
+ mock_execute.assert_called_once_with(f"SHOW VIEWS IN `{schema}`")
+ assert result == {"a", "d"}
+
+
+def test_get_view_names_without_schema():
+ database = mock.MagicMock()
+ mock_execute = mock.MagicMock()
+ database.get_raw_connection().__enter__().cursor().execute = mock_execute
+ database.get_raw_connection().__enter__().cursor().fetchall = mock.MagicMock(
+ return_value=[["a", "b,", "c"], ["d", "e"]]
+ )
+ result = HiveEngineSpec.get_view_names(database, mock.Mock(), None)
+ mock_execute.assert_called_once_with("SHOW VIEWS")
+ assert result == {"a", "d"}
+
+
+@mock.patch("superset.db_engine_specs.base.BaseEngineSpec.get_table_names")
+@mock.patch("superset.db_engine_specs.hive.HiveEngineSpec.get_view_names")
+def test_get_table_names(
+ mock_get_view_names,
+ mock_get_table_names,
+):
+ mock_get_view_names.return_value = {"view1", "view2"}
+ mock_get_table_names.return_value = {"table1", "table2", "view1", "view2"}
+ tables = HiveEngineSpec.get_table_names(mock.Mock(), mock.Mock(), None)
+ assert tables == {"table1", "table2"}