You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by vi...@apache.org on 2020/10/28 08:30:57 UTC

[incubator-superset] 03/08: chore(sql-lab): catch PyArrow deserialization error (#11201)

This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch 0.38
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git

commit 9aba60714917fcbf714f9c71f26ce9e3ffad881d
Author: Ville Brofeldt <33...@users.noreply.github.com>
AuthorDate: Mon Oct 12 10:16:00 2020 +0300

    chore(sql-lab): catch PyArrow deserialization error (#11201)
---
 superset/exceptions.py  |  4 ++++
 superset/views/core.py  | 17 +++++++++++++----
 superset/views/utils.py | 11 +++++++++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/superset/exceptions.py b/superset/exceptions.py
index 3dca1de..c0d55f8 100644
--- a/superset/exceptions.py
+++ b/superset/exceptions.py
@@ -93,3 +93,7 @@ class QueryObjectValidationError(SupersetException):
 
 class DashboardImportException(SupersetException):
     pass
+
+
+class SerializationError(SupersetException):
+    pass
diff --git a/superset/views/core.py b/superset/views/core.py
index 36668cf..7296017 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -82,6 +82,7 @@ from superset.databases.filters import DatabaseFilter
 from superset.exceptions import (
     CertificateException,
     DatabaseNotFound,
+    SerializationError,
     SupersetException,
     SupersetSecurityException,
     SupersetTimeoutException,
@@ -1959,7 +1960,9 @@ class Superset(BaseSupersetView):  # pylint: disable=too-many-public-methods
         return self.results_exec(key)
 
     @staticmethod
-    def results_exec(key: str) -> FlaskResponse:
+    def results_exec(  # pylint: disable=too-many-return-statements
+        key: str,
+    ) -> FlaskResponse:
         """Serves a key off of the results backend
 
         It is possible to pass the `rows` query argument to limit the number
@@ -1993,9 +1996,15 @@ class Superset(BaseSupersetView):  # pylint: disable=too-many-public-methods
             return json_errors_response([ex.error], status=403)
 
         payload = utils.zlib_decompress(blob, decode=not results_backend_use_msgpack)
-        obj = _deserialize_results_payload(
-            payload, query, cast(bool, results_backend_use_msgpack)
-        )
+        try:
+            obj = _deserialize_results_payload(
+                payload, query, cast(bool, results_backend_use_msgpack)
+            )
+        except SerializationError:
+            return json_error_response(
+                __("Data could not be deserialized. You may want to re-run the query."),
+                status=404,
+            )
 
         if "rows" in request.args:
             try:
diff --git a/superset/views/utils.py b/superset/views/utils.py
index dc16494..226d9ef 100644
--- a/superset/views/utils.py
+++ b/superset/views/utils.py
@@ -39,7 +39,11 @@ from superset import (
 )
 from superset.connectors.connector_registry import ConnectorRegistry
 from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
-from superset.exceptions import SupersetException, SupersetSecurityException
+from superset.exceptions import (
+    SerializationError,
+    SupersetException,
+    SupersetSecurityException,
+)
 from superset.legacy import update_time_range
 from superset.models.core import Database
 from superset.models.dashboard import Dashboard
@@ -559,7 +563,10 @@ def _deserialize_results_payload(
             ds_payload = msgpack.loads(payload, raw=False)
 
         with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
-            pa_table = pa.deserialize(ds_payload["data"])
+            try:
+                pa_table = pa.deserialize(ds_payload["data"])
+            except pa.ArrowSerializationError:
+                raise SerializationError("Unable to deserialize table")
 
         df = result_set.SupersetResultSet.convert_table_to_df(pa_table)
         ds_payload["data"] = dataframe.df_to_records(df) or []