You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/01/06 23:15:23 UTC
[superset] branch master updated: fix: stringify ValueErrors for NaT types (#22628)
This is an automated email from the ASF dual-hosted git repository.
elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 804e89d725 fix: stringify ValueErrors for NaT types (#22628)
804e89d725 is described below
commit 804e89d725cd9202ff595d1fe5b49bdffa235d73
Author: Elizabeth Thompson <es...@gmail.com>
AuthorDate: Fri Jan 6 15:15:12 2023 -0800
fix: stringify ValueErrors for NaT types (#22628)
---
superset/result_set.py | 1 +
tests/unit_tests/dataframe_test.py | 18 ++++++++++++++++++
tests/unit_tests/result_set_test.py | 34 ++++++++++++++++++++++++++++++++++
3 files changed, 53 insertions(+)
diff --git a/superset/result_set.py b/superset/result_set.py
index a6f39f76bf..373aaf1b72 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -135,6 +135,7 @@ class SupersetResultSet:
pa.lib.ArrowInvalid,
pa.lib.ArrowTypeError,
pa.lib.ArrowNotImplementedError,
+ ValueError,
TypeError, # this is super hackey,
# https://issues.apache.org/jira/browse/ARROW-7855
):
diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py
index 016d2f4d9b..3d8bd15aef 100644
--- a/tests/unit_tests/dataframe_test.py
+++ b/tests/unit_tests/dataframe_test.py
@@ -19,6 +19,7 @@ from datetime import datetime
import pytest
from pandas import Timestamp
+from pandas._libs.tslibs import NaT
from superset.dataframe import df_to_records
from superset.superset_typing import DbapiDescription
@@ -41,6 +42,23 @@ def test_df_to_records() -> None:
]
+def test_df_to_records_NaT_type() -> None:
+ from superset.db_engine_specs import BaseEngineSpec
+ from superset.result_set import SupersetResultSet
+
+ data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)]
+ cursor_descr: DbapiDescription = [
+ ("date", "timestamp with time zone", None, None, None, None, False)
+ ]
+ results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+ df = results.to_pandas_df()
+
+ assert df_to_records(df) == [
+ {"date": None},
+ {"date": '"2023-01-06T20:50:31.749000+00:00"'},
+ ]
+
+
def test_js_max_int() -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet
diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py
index e7371f5c0f..0a78e0a5ed 100644
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -106,3 +106,37 @@ def test_stringify_with_null_integers():
)
assert np.array_equal(result_set, expected)
+
+
+def test_stringify_with_null_timestamps():
+ """
+ Test that we can safely handle type errors when a timestamp column has a null value
+ """
+
+ data = [
+ ("foo", "bar", pd.NaT, None),
+ ("foo", "bar", pd.NaT, True),
+ ("foo", "bar", pd.NaT, None),
+ ]
+ numpy_dtype = [
+ ("id", "object"),
+ ("value", "object"),
+ ("num", "object"),
+ ("bool", "object"),
+ ]
+
+ array2 = np.array(data, dtype=numpy_dtype)
+ column_names = ["id", "value", "num", "bool"]
+
+ result_set = np.array([stringify_values(array2[column]) for column in column_names])
+
+ expected = np.array(
+ [
+ array(['"foo"', '"foo"', '"foo"'], dtype=object),
+ array(['"bar"', '"bar"', '"bar"'], dtype=object),
+ array([None, None, None], dtype=object),
+ array([None, "true", None], dtype=object),
+ ]
+ )
+
+ assert np.array_equal(result_set, expected)