You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/01/06 23:15:23 UTC
[superset] branch master updated: fix: stringify ValueErrors for NaT types (#22628)

This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 804e89d725 fix: stringify ValueErrors for NaT types (#22628)
804e89d725 is described below

commit 804e89d725cd9202ff595d1fe5b49bdffa235d73
Author: Elizabeth Thompson <es...@gmail.com>
AuthorDate: Fri Jan 6 15:15:12 2023 -0800

    fix: stringify ValueErrors for NaT types (#22628)
---
 superset/result_set.py              |  1 +
 tests/unit_tests/dataframe_test.py  | 18 ++++++++++++++++++
 tests/unit_tests/result_set_test.py | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)

diff --git a/superset/result_set.py b/superset/result_set.py
index a6f39f76bf..373aaf1b72 100644
--- a/superset/result_set.py
+++ b/superset/result_set.py
@@ -135,6 +135,7 @@ class SupersetResultSet:
                     pa.lib.ArrowInvalid,
                     pa.lib.ArrowTypeError,
                     pa.lib.ArrowNotImplementedError,
+                    ValueError,
                     TypeError,  # this is super hackey,
                     # https://issues.apache.org/jira/browse/ARROW-7855
                 ):
diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py
index 016d2f4d9b..3d8bd15aef 100644
--- a/tests/unit_tests/dataframe_test.py
+++ b/tests/unit_tests/dataframe_test.py
@@ -19,6 +19,7 @@ from datetime import datetime
 
 import pytest
 from pandas import Timestamp
+from pandas._libs.tslibs import NaT
 
 from superset.dataframe import df_to_records
 from superset.superset_typing import DbapiDescription
@@ -41,6 +42,23 @@ def test_df_to_records() -> None:
     ]
 
 
+def test_df_to_records_NaT_type() -> None:
+    from superset.db_engine_specs import BaseEngineSpec
+    from superset.result_set import SupersetResultSet
+
+    data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)]
+    cursor_descr: DbapiDescription = [
+        ("date", "timestamp with time zone", None, None, None, None, False)
+    ]
+    results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
+    df = results.to_pandas_df()
+
+    assert df_to_records(df) == [
+        {"date": None},
+        {"date": '"2023-01-06T20:50:31.749000+00:00"'},
+    ]
+
+
 def test_js_max_int() -> None:
     from superset.db_engine_specs import BaseEngineSpec
     from superset.result_set import SupersetResultSet
diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py
index e7371f5c0f..0a78e0a5ed 100644
--- a/tests/unit_tests/result_set_test.py
+++ b/tests/unit_tests/result_set_test.py
@@ -106,3 +106,37 @@ def test_stringify_with_null_integers():
     )
 
     assert np.array_equal(result_set, expected)
+
+
+def test_stringify_with_null_timestamps():
+    """
+    Test that we can safely handle type errors when a timestamp column has a null value
+    """
+
+    data = [
+        ("foo", "bar", pd.NaT, None),
+        ("foo", "bar", pd.NaT, True),
+        ("foo", "bar", pd.NaT, None),
+    ]
+    numpy_dtype = [
+        ("id", "object"),
+        ("value", "object"),
+        ("num", "object"),
+        ("bool", "object"),
+    ]
+
+    array2 = np.array(data, dtype=numpy_dtype)
+    column_names = ["id", "value", "num", "bool"]
+
+    result_set = np.array([stringify_values(array2[column]) for column in column_names])
+
+    expected = np.array(
+        [
+            array(['"foo"', '"foo"', '"foo"'], dtype=object),
+            array(['"bar"', '"bar"', '"bar"'], dtype=object),
+            array([None, None, None], dtype=object),
+            array([None, "true", None], dtype=object),
+        ]
+    )
+
+    assert np.array_equal(result_set, expected)