You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@superset.apache.org by be...@apache.org on 2023/03/22 22:35:22 UTC

[superset] branch bigquery_get_catalog_names updated (3b25de4c88 -> 56618246b4)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a change to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git


 discard 3b25de4c88 feat(bigquery): get_catalog_names
     new 56618246b4 feat(bigquery): get_catalog_names

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (3b25de4c88)
            \
             N -- N -- N   refs/heads/bigquery_get_catalog_names (56618246b4)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 superset/db_engine_specs/bigquery.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

[superset] 01/01: feat(bigquery): get_catalog_names

Posted by be...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 56618246b48340a8db8868d9e22c0669a2852f31
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Wed Mar 22 15:33:13 2023 -0700

    feat(bigquery): get_catalog_names
---
 superset/db_engine_specs/bigquery.py | 83 ++++++++++++++++++++++--------------
 1 file changed, 51 insertions(+), 32 deletions(-)

diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
index 171dad4732..26e3df0a9c 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -28,6 +28,7 @@ from marshmallow import fields, Schema
 from marshmallow.exceptions import ValidationError
 from sqlalchemy import column, types
 from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.sql import sqltypes
 from typing_extensions import TypedDict
 
@@ -42,6 +43,17 @@ from superset.sql_parse import Table
 from superset.utils import core as utils
 from superset.utils.hashing import md5_sha_from_str
 
+try:
+    import pandas_gbq
+    from google.cloud import bigquery
+    from google.oauth2 import service_account
+
+    Client = bigquery.Client
+except ModuleNotFoundError:
+    bigquery = None
+    pandas_gbq = None
+    Client = None  # for type checking
+
 if TYPE_CHECKING:
     from superset.models.core import Database  # pragma: no cover
 
@@ -327,17 +339,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
         :param df: The dataframe with data to be uploaded
         :param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method
         """
-
-        try:
-            # pylint: disable=import-outside-toplevel
-            import pandas_gbq
-            from google.oauth2 import service_account
-        except ImportError as ex:
+        if pandas_gbq is None or service_account is None:
             raise Exception(
-                "Could not import libraries `pandas_gbq` or `google.oauth2`, which are "
-                "required to be installed in your environment in order "
-                "to upload data to BigQuery"
-            ) from ex
+                "Could not import libraries needed to upload data to BigQuery."
+            )
 
         if not table.schema:
             raise Exception("The table schema must be defined")
@@ -366,6 +371,19 @@ class BigQueryEngineSpec(BaseEngineSpec):
 
         pandas_gbq.to_gbq(df, **to_gbq_kwargs)
 
+    @classmethod
+    def _get_client(cls, engine: Engine) -> Client:
+        """
+        Return the BigQuery client associated with an engine.
+        """
+        if bigquery is None or service_account is None:
+            raise Exception("Could not import libraries needed to connect to BigQuery.")
+
+        credentials = service_account.Credentials.from_service_account_info(
+            engine.dialect.credentials_info
+        )
+        return bigquery.Client(credentials=credentials)
+
     @classmethod
     def estimate_query_cost(
         cls,
@@ -395,35 +413,36 @@ class BigQueryEngineSpec(BaseEngineSpec):
             costs.append(cls.estimate_statement_cost(processed_statement, database))
         return costs
 
+    @classmethod
+    def get_catalog_names(  # pylint: disable=unused-argument
+        cls,
+        database: "Database",
+        inspector: Inspector,
+    ) -> List[str]:
+        """
+        Get all catalogs.
+
+        In BigQuery, a catalog is called a "project".
+        """
+        with database.get_sqla_engine_with_context() as engine:
+            client = cls._get_client(engine)
+            projects = client.list_projects()
+
+        return sorted(project.project_id for project in projects)
+
     @classmethod
     def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
         return True
 
     @classmethod
     def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]:
-        try:
-            # pylint: disable=import-outside-toplevel
-            # It's the only way to perfom a dry-run estimate cost
-            from google.cloud import bigquery
-            from google.oauth2 import service_account
-        except ImportError as ex:
-            raise Exception(
-                "Could not import libraries `pygibquery` or `google.oauth2`, which are "
-                "required to be installed in your environment in order "
-                "to upload data to BigQuery"
-            ) from ex
-
         with cls.get_engine(cursor) as engine:
-            creds = engine.dialect.credentials_info
-
-        creds = service_account.Credentials.from_service_account_info(creds)
-        client = bigquery.Client(credentials=creds)
-        job_config = bigquery.QueryJobConfig(dry_run=True)
-
-        query_job = client.query(
-            statement,
-            job_config=job_config,
-        )  # Make an API request.
+            client = cls._get_client(engine)
+            job_config = bigquery.QueryJobConfig(dry_run=True)
+            query_job = client.query(
+                statement,
+                job_config=job_config,
+            )  # Make an API request.
 
         # Format Bytes.
         # TODO: Humanize in case more db engine specs need to be added,