You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/03/22 22:35:22 UTC
[superset] branch bigquery_get_catalog_names updated (3b25de4c88 -> 56618246b4)
This is an automated email from the ASF dual-hosted git repository.
beto pushed a change to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git
discard 3b25de4c88 feat(bigquery): get_catalog_names
new 56618246b4 feat(bigquery): get_catalog_names
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (3b25de4c88)
\
N -- N -- N refs/heads/bigquery_get_catalog_names (56618246b4)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
superset/db_engine_specs/bigquery.py | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
[superset] 01/01: feat(bigquery): get_catalog_names
Posted by be...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 56618246b48340a8db8868d9e22c0669a2852f31
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Wed Mar 22 15:33:13 2023 -0700
feat(bigquery): get_catalog_names
---
superset/db_engine_specs/bigquery.py | 83 ++++++++++++++++++++++--------------
1 file changed, 51 insertions(+), 32 deletions(-)
diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
index 171dad4732..26e3df0a9c 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -28,6 +28,7 @@ from marshmallow import fields, Schema
from marshmallow.exceptions import ValidationError
from sqlalchemy import column, types
from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.sql import sqltypes
from typing_extensions import TypedDict
@@ -42,6 +43,17 @@ from superset.sql_parse import Table
from superset.utils import core as utils
from superset.utils.hashing import md5_sha_from_str
+try:
+ import pandas_gbq
+ from google.cloud import bigquery
+ from google.oauth2 import service_account
+
+ Client = bigquery.Client
+except ModuleNotFoundError:
+ bigquery = None
+ pandas_gbq = None
+ Client = None # for type checking
+
if TYPE_CHECKING:
from superset.models.core import Database # pragma: no cover
@@ -327,17 +339,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
:param df: The dataframe with data to be uploaded
:param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method
"""
-
- try:
- # pylint: disable=import-outside-toplevel
- import pandas_gbq
- from google.oauth2 import service_account
- except ImportError as ex:
+ if pandas_gbq is None or service_account is None:
raise Exception(
- "Could not import libraries `pandas_gbq` or `google.oauth2`, which are "
- "required to be installed in your environment in order "
- "to upload data to BigQuery"
- ) from ex
+ "Could not import libraries needed to upload data to BigQuery."
+ )
if not table.schema:
raise Exception("The table schema must be defined")
@@ -366,6 +371,19 @@ class BigQueryEngineSpec(BaseEngineSpec):
pandas_gbq.to_gbq(df, **to_gbq_kwargs)
+ @classmethod
+ def _get_client(cls, engine: Engine) -> Client:
+ """
+ Return the BigQuery client associated with an engine.
+ """
+ if bigquery is None or service_account is None:
+ raise Exception("Could not import libraries needed to connect to BigQuery.")
+
+ credentials = service_account.Credentials.from_service_account_info(
+ engine.dialect.credentials_info
+ )
+ return bigquery.Client(credentials=credentials)
+
@classmethod
def estimate_query_cost(
cls,
@@ -395,35 +413,36 @@ class BigQueryEngineSpec(BaseEngineSpec):
costs.append(cls.estimate_statement_cost(processed_statement, database))
return costs
+ @classmethod
+ def get_catalog_names( # pylint: disable=unused-argument
+ cls,
+ database: "Database",
+ inspector: Inspector,
+ ) -> List[str]:
+ """
+ Get all catalogs.
+
+ In BigQuery, a catalog is called a "project".
+ """
+ with database.get_sqla_engine_with_context() as engine:
+ client = cls._get_client(engine)
+ projects = client.list_projects()
+
+ return sorted(project.project_id for project in projects)
+
@classmethod
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
return True
@classmethod
def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]:
- try:
- # pylint: disable=import-outside-toplevel
- # It's the only way to perfom a dry-run estimate cost
- from google.cloud import bigquery
- from google.oauth2 import service_account
- except ImportError as ex:
- raise Exception(
- "Could not import libraries `pygibquery` or `google.oauth2`, which are "
- "required to be installed in your environment in order "
- "to upload data to BigQuery"
- ) from ex
-
with cls.get_engine(cursor) as engine:
- creds = engine.dialect.credentials_info
-
- creds = service_account.Credentials.from_service_account_info(creds)
- client = bigquery.Client(credentials=creds)
- job_config = bigquery.QueryJobConfig(dry_run=True)
-
- query_job = client.query(
- statement,
- job_config=job_config,
- ) # Make an API request.
+ client = cls._get_client(engine)
+ job_config = bigquery.QueryJobConfig(dry_run=True)
+ query_job = client.query(
+ statement,
+ job_config=job_config,
+ ) # Make an API request.
# Format Bytes.
# TODO: Humanize in case more db engine specs need to be added,