You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@superset.apache.org by be...@apache.org on 2023/03/22 22:33:32 UTC

[superset] branch bigquery_get_catalog_names created (now 3b25de4c88)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a change to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git


      at 3b25de4c88 feat(bigquery): get_catalog_names

This branch includes the following new commits:

     new 3b25de4c88 feat(bigquery): get_catalog_names

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.

[superset] 01/01: feat(bigquery): get_catalog_names

Posted by be...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch bigquery_get_catalog_names
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 3b25de4c88461c0370f93b8ebf4d0691e19f36a8
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Wed Mar 22 15:33:13 2023 -0700

    feat(bigquery): get_catalog_names
---
 superset/db_engine_specs/bigquery.py | 85 ++++++++++++++++++++++--------------
 1 file changed, 53 insertions(+), 32 deletions(-)

diff --git a/superset/db_engine_specs/bigquery.py b/superset/db_engine_specs/bigquery.py
index 171dad4732..c65d1c2f0a 100644
--- a/superset/db_engine_specs/bigquery.py
+++ b/superset/db_engine_specs/bigquery.py
@@ -28,6 +28,7 @@ from marshmallow import fields, Schema
 from marshmallow.exceptions import ValidationError
 from sqlalchemy import column, types
 from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
 from sqlalchemy.sql import sqltypes
 from typing_extensions import TypedDict
 
@@ -42,6 +43,17 @@ from superset.sql_parse import Table
 from superset.utils import core as utils
 from superset.utils.hashing import md5_sha_from_str
 
+try:
+    import pandas_gbq
+    from google.cloud import bigquery
+    from google.oauth2 import service_account
+
+    Client = bigquery.Client
+except ModuleNotFoundError:
+    bigquery = None
+    pandas_gbq = None
+    Client = None  # for type checking
+
 if TYPE_CHECKING:
     from superset.models.core import Database  # pragma: no cover
 
@@ -327,17 +339,10 @@ class BigQueryEngineSpec(BaseEngineSpec):
         :param df: The dataframe with data to be uploaded
         :param to_sql_kwargs: The kwargs to be passed to pandas.DataFrame.to_sql` method
         """
-
-        try:
-            # pylint: disable=import-outside-toplevel
-            import pandas_gbq
-            from google.oauth2 import service_account
-        except ImportError as ex:
+        if pandas_gbq is None or service_account is None:
             raise Exception(
-                "Could not import libraries `pandas_gbq` or `google.oauth2`, which are "
-                "required to be installed in your environment in order "
-                "to upload data to BigQuery"
-            ) from ex
+                "Could not import libraries needed to upload data to BigQuery."
+            )
 
         if not table.schema:
             raise Exception("The table schema must be defined")
@@ -366,6 +371,21 @@ class BigQueryEngineSpec(BaseEngineSpec):
 
         pandas_gbq.to_gbq(df, **to_gbq_kwargs)
 
+    @classmethod
+    def _get_client(cls, engine: Engine) -> Client:
+        """
+        Return the BigQuery client associated with an engine.
+        """
+        if bigquery is None or service_account is None:
+            raise Exception(
+                "Could not import libraries needed to upload data to BigQuery."
+            )
+
+        credentials = service_account.Credentials.from_service_account_info(
+            engine.dialect.credentials_info
+        )
+        return bigquery.Client(credentials=credentials)
+
     @classmethod
     def estimate_query_cost(
         cls,
@@ -395,35 +415,36 @@ class BigQueryEngineSpec(BaseEngineSpec):
             costs.append(cls.estimate_statement_cost(processed_statement, database))
         return costs
 
+    @classmethod
+    def get_catalog_names(  # pylint: disable=unused-argument
+        cls,
+        database: "Database",
+        inspector: Inspector,
+    ) -> List[str]:
+        """
+        Get all catalogs.
+
+        In BigQuery, a catalog is called a "project".
+        """
+        with database.get_sqla_engine_with_context() as engine:
+            client = cls._get_client(engine)
+            projects = client.list_projects()
+
+        return sorted(project.project_id for project in projects)
+
     @classmethod
     def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
         return True
 
     @classmethod
     def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]:
-        try:
-            # pylint: disable=import-outside-toplevel
-            # It's the only way to perfom a dry-run estimate cost
-            from google.cloud import bigquery
-            from google.oauth2 import service_account
-        except ImportError as ex:
-            raise Exception(
-                "Could not import libraries `pygibquery` or `google.oauth2`, which are "
-                "required to be installed in your environment in order "
-                "to upload data to BigQuery"
-            ) from ex
-
         with cls.get_engine(cursor) as engine:
-            creds = engine.dialect.credentials_info
-
-        creds = service_account.Credentials.from_service_account_info(creds)
-        client = bigquery.Client(credentials=creds)
-        job_config = bigquery.QueryJobConfig(dry_run=True)
-
-        query_job = client.query(
-            statement,
-            job_config=job_config,
-        )  # Make an API request.
+            client = cls._get_client(engine)
+            job_config = bigquery.QueryJobConfig(dry_run=True)
+            query_job = client.query(
+                statement,
+                job_config=job_config,
+            )  # Make an API request.
 
         # Format Bytes.
         # TODO: Humanize in case more db engine specs need to be added,