You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/08/08 23:15:01 UTC
[superset] branch upload_gsheets updated (57adb51050 -> 215094f08a)
This is an automated email from the ASF dual-hosted git repository.
beto pushed a change to branch upload_gsheets
in repository https://gitbox.apache.org/repos/asf/superset.git
discard 57adb51050 feat(gsheets): file upload
new 215094f08a feat(gsheets): file upload
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (57adb51050)
\
N -- N -- N refs/heads/upload_gsheets (215094f08a)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
superset/db_engine_specs/gsheets.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
[superset] 01/01: feat(gsheets): file upload
Posted by be...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch upload_gsheets
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 215094f08a756964af21110620a633fd2eeb2b50
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Tue Aug 8 15:58:56 2023 -0700
feat(gsheets): file upload
---
superset/db_engine_specs/gsheets.py | 157 ++++++++++++++++++++++++++++++++----
1 file changed, 142 insertions(+), 15 deletions(-)
diff --git a/superset/db_engine_specs/gsheets.py b/superset/db_engine_specs/gsheets.py
index 777499a8f9..45ee757289 100644
--- a/superset/db_engine_specs/gsheets.py
+++ b/superset/db_engine_specs/gsheets.py
@@ -14,30 +14,42 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+
+from __future__ import annotations
+
import json
+import logging
import re
from re import Pattern
-from typing import Any, Optional, TYPE_CHECKING
+from typing import Any, TYPE_CHECKING
+import pandas as pd
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from flask import g
from flask_babel import gettext as __
from marshmallow import fields, Schema
from marshmallow.exceptions import ValidationError
+from requests import Session
+from shillelagh.backends.apsw.dialects.base import get_adapter_for_table_name
from sqlalchemy.engine import create_engine
from sqlalchemy.engine.url import URL
from typing_extensions import TypedDict
-from superset import security_manager
+from superset import db, security_manager
from superset.constants import PASSWORD_MASK
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
+from superset.exceptions import SupersetException
if TYPE_CHECKING:
from superset.models.core import Database
+ from superset.sql_parse import Table
+_logger = logging.getLogger()
+
+EXAMPLE_GSHEETS_URL = "https://docs.google.com/spreadsheets/d/1LcWZMsdCl92g7nA-D6qGRqg1T5TiHyuKJUY1u9XAnsk/edit#gid=0"
SYNTAX_ERROR_REGEX = re.compile('SQLError: near "(?P<server_error>.*?)": syntax error')
@@ -57,7 +69,7 @@ class GSheetsParametersSchema(Schema):
class GSheetsParametersType(TypedDict):
service_account_info: str
- catalog: Optional[dict[str, str]]
+ catalog: dict[str, str] | None
class GSheetsPropertiesType(TypedDict):
@@ -88,14 +100,14 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
),
}
- supports_file_upload = False
+ supports_file_upload = True
@classmethod
def get_url_for_impersonation(
cls,
url: URL,
impersonate_user: bool,
- username: Optional[str],
+ username: str | None,
) -> URL:
if impersonate_user and username is not None:
user = security_manager.find_user(username=username)
@@ -107,9 +119,9 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
@classmethod
def extra_table_metadata(
cls,
- database: "Database",
+ database: Database,
table_name: str,
- schema_name: Optional[str],
+ schema_name: str | None,
) -> dict[str, Any]:
with database.get_raw_connection(schema=schema_name) as conn:
cursor = conn.cursor()
@@ -126,9 +138,8 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
def build_sqlalchemy_uri(
cls,
_: GSheetsParametersType,
- encrypted_extra: Optional[ # pylint: disable=unused-argument
- dict[str, Any]
- ] = None,
+ encrypted_extra: None
+ | (dict[str, Any]) = None, # pylint: disable=unused-argument
) -> str:
return "gsheets://"
@@ -136,7 +147,7 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
def get_parameters_from_uri(
cls,
uri: str, # pylint: disable=unused-argument
- encrypted_extra: Optional[dict[str, Any]] = None,
+ encrypted_extra: dict[str, Any] | None = None,
) -> Any:
# Building parameters from encrypted_extra and uri
if encrypted_extra:
@@ -145,7 +156,7 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
raise ValidationError("Invalid service credentials")
@classmethod
- def mask_encrypted_extra(cls, encrypted_extra: Optional[str]) -> Optional[str]:
+ def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
if encrypted_extra is None:
return encrypted_extra
@@ -162,9 +173,7 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
return json.dumps(config)
@classmethod
- def unmask_encrypted_extra(
- cls, old: Optional[str], new: Optional[str]
- ) -> Optional[str]:
+ def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Reuse ``private_key`` if available and unchanged.
"""
@@ -299,3 +308,121 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
)
idx += 1
return errors
+
+ @staticmethod
+ def _do_post(
+ session: Session,
+ url: str,
+ body: dict[str, Any],
+ **kwargs: Any,
+ ) -> dict[str, Any]:
+ """
+ POST to the Google API.
+
+ Helper function that handles logging and error handling.
+ """
+ _logger.info("POST %s", url)
+ _logger.debug(body)
+ response = session.post(
+ url,
+ json=body,
+ **kwargs,
+ )
+
+ payload = response.json()
+ _logger.debug(payload)
+
+ if "error" in payload:
+ raise SupersetException(payload["error"]["message"])
+
+ return payload
+
+ @classmethod
+ def df_to_sql(
+ cls,
+ database: Database,
+ table: Table,
+ df: pd.DataFrame,
+ to_sql_kwargs: dict[str, Any],
+ ) -> None:
+ """
+ Create a new sheet and update the DB catalog.
+
+ Since Google Sheets is not a database, uploading a file is slightly different
+ from other traditional databases. To create a table with a given name we first
+ create a spreadsheet with the contents of the dataframe, and we later update the
+ database catalog to add a mapping between the desired table name and the URL of
+ the new sheet.
+
+ If the table already exists and the user wants it replaced we clear all the
+ cells in the existing sheet before uploading the new data. Appending to an
+ existing table is not supported because we can't ensure that the schemas match.
+ """
+ # grab the existing catalog, if any
+ extra = database.get_extra()
+ engine_params = extra.setdefault("engine_params", {})
+ catalog = engine_params.setdefault("catalog", {})
+
+ # sanity checks
+ spreadsheet_url = catalog.get(table.table)
+ if spreadsheet_url and "if_exists" in to_sql_kwargs:
+ if to_sql_kwargs["if_exists"] == "append":
+ # no way we're going to append a dataframe to a spreadsheet, that's
+ # never going to work
+ raise SupersetException("Append operation not currently supported")
+ if to_sql_kwargs["if_exists"] == "fail":
+ raise SupersetException("Table already exists")
+ if to_sql_kwargs["if_exists"] == "replace":
+ pass
+
+ # get the Google session from the Shillelagh adapter
+ with cls.get_engine(database) as engine:
+ with engine.connect() as conn:
+ # any GSheets URL will work to get a working session
+ adapter = get_adapter_for_table_name(
+ conn,
+ spreadsheet_url or EXAMPLE_GSHEETS_URL,
+ )
+ session = adapter._get_session() # pylint: disable=protected-access
+
+ # clear existing sheet, or create a new one
+ if spreadsheet_url:
+ spreadsheet_id = adapter._spreadsheet_id
+ range_ = adapter._sheet_name
+ url = (
+ "https://sheets.googleapis.com/v4/spreadsheets/"
+ f"{spreadsheet_id}/values/'{range_}':clear"
+ )
+ cls._do_post(session, url, {})
+ else:
+ payload = cls._do_post(
+ session,
+ "https://sheets.googleapis.com/v4/spreadsheets",
+ {"properties": {"title": table.table}},
+ )
+ spreadsheet_id = payload["spreadsheetId"]
+ range_ = payload["sheets"][0]["properties"]["title"]
+ spreadsheet_url = payload["spreadsheetUrl"]
+
+ # insert data
+ body = {
+ "range": range_,
+ "majorDimension": "ROWS",
+ "values": df.values.tolist(),
+ }
+ url = (
+ "https://sheets.googleapis.com/v4/spreadsheets/"
+ f"{spreadsheet_id}/values/{range_}:append"
+ )
+ cls._do_post(
+ session,
+ url,
+ body,
+ params={"valueInputOption": "USER_ENTERED"},
+ )
+
+ # update catalog
+ catalog[table.table] = spreadsheet_url
+ database.extra = json.dumps(extra)
+ db.session.add(database)
+ db.session.commit()