You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/03/22 22:33:32 UTC
[superset] branch bigquery_get_catalog_names created (now 3b25de4c88)
This is an automated email from the ASF dual-hosted git repository.
beto pushed a change to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git
at 3b25de4c88 feat(bigquery): get_catalog_names
This branch includes the following new commits:
new 3b25de4c88 feat(bigquery): get_catalog_names
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
[superset] 01/01: feat(bigquery): get_catalog_names
Posted by be...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 3b25de4c88461c0370f93b8ebf4d0691e19f36a8
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Wed Mar 22 15:33:13 2023 -0700
feat(bigquery): get_catalog_names
---
superset/db_engine_specs/bigquery.py | 85 ++++++++++++++++++++++--------------
1 file changed, 53 insertions(+), 32 deletions(-)
diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
index 171dad4732..c65d1c2f0a 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -28,6 +28,7 @@ from marshmallow import fields, Schema
from marshmallow.exceptions import ValidationError
from sqlalchemy import column, types
from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.sql import sqltypes
from typing_extensions import TypedDict
@@ -42,6 +43,17 @@ from superset.sql_parse import Table
from superset.utils import core as utils
from superset.utils.hashing import md5_sha_from_str
+try:
+ import pandas_gbq
+ from google.cloud import bigquery
+ from google.oauth2 import service_account
+
+ Client = bigquery.Client
+except ModuleNotFoundError:
+ bigquery = None
+ pandas_gbq = None
+ Client = None # for type checking
+
if TYPE_CHECKING:
from superset.models.core import Database # pragma: no cover
@@ -327,17 +339,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
:param df: The dataframe with data to be uploaded
:param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method
"""
-
- try:
- # pylint: disable=import-outside-toplevel
- import pandas_gbq
- from google.oauth2 import service_account
- except ImportError as ex:
+ if pandas_gbq is None or service_account is None:
raise Exception(
- "Could not import libraries `pandas_gbq` or `google.oauth2`, which are "
- "required to be installed in your environment in order "
- "to upload data to BigQuery"
- ) from ex
+ "Could not import libraries needed to upload data to BigQuery."
+ )
if not table.schema:
raise Exception("The table schema must be defined")
@@ -366,6 +371,21 @@ class BigQueryEngineSpec(BaseEngineSpec):
pandas_gbq.to_gbq(df, **to_gbq_kwargs)
+ @classmethod
+ def _get_client(cls, engine: Engine) -> Client:
+ """
+ Return the BigQuery client associated with an engine.
+ """
+ if bigquery is None or service_account is None:
+ raise Exception(
+ "Could not import libraries needed to upload data to BigQuery."
+ )
+
+ credentials = service_account.Credentials.from_service_account_info(
+ engine.dialect.credentials_info
+ )
+ return bigquery.Client(credentials=credentials)
+
@classmethod
def estimate_query_cost(
cls,
@@ -395,35 +415,36 @@ class BigQueryEngineSpec(BaseEngineSpec):
costs.append(cls.estimate_statement_cost(processed_statement, database))
return costs
+ @classmethod
+ def get_catalog_names( # pylint: disable=unused-argument
+ cls,
+ database: "Database",
+ inspector: Inspector,
+ ) -> List[str]:
+ """
+ Get all catalogs.
+
+ In BigQuery, a catalog is called a "project".
+ """
+ with database.get_sqla_engine_with_context() as engine:
+ client = cls._get_client(engine)
+ projects = client.list_projects()
+
+ return sorted(project.project_id for project in projects)
+
@classmethod
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
return True
@classmethod
def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]:
- try:
- # pylint: disable=import-outside-toplevel
- # It's the only way to perfom a dry-run estimate cost
- from google.cloud import bigquery
- from google.oauth2 import service_account
- except ImportError as ex:
- raise Exception(
- "Could not import libraries `pygibquery` or `google.oauth2`, which are "
- "required to be installed in your environment in order "
- "to upload data to BigQuery"
- ) from ex
-
with cls.get_engine(cursor) as engine:
- creds = engine.dialect.credentials_info
-
- creds = service_account.Credentials.from_service_account_info(creds)
- client = bigquery.Client(credentials=creds)
- job_config = bigquery.QueryJobConfig(dry_run=True)
-
- query_job = client.query(
- statement,
- job_config=job_config,
- ) # Make an API request.
+ client = cls._get_client(engine)
+ job_config = bigquery.QueryJobConfig(dry_run=True)
+ query_job = client.query(
+ statement,
+ job_config=job_config,
+ ) # Make an API request.
# Format Bytes.
# TODO: Humanize in case more db engine specs need to be added,