You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by be...@apache.org on 2023/08/10 23:15:58 UTC

[superset] branch sc_73447 updated (4ecf3b9118 -> b497b6e92d)

This is an automated email from the ASF dual-hosted git repository.

beto pushed a change to branch sc_73447
in repository https://gitbox.apache.org/repos/asf/superset.git


 discard 4ecf3b9118 fix: to_datetime in Pandas 2
     new b497b6e92d fix: to_datetime in Pandas 2

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (4ecf3b9118)
            \
             N -- N -- N   refs/heads/sc_73447 (b497b6e92d)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 superset/utils/core.py | 1 -
 1 file changed, 1 deletion(-)


[superset] 01/01: fix: to_datetime in Pandas 2

Posted by be...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch sc_73447
in repository https://gitbox.apache.org/repos/asf/superset.git

commit b497b6e92d2d43a7efaf68acec854df9af0b4cc4
Author: Beto Dealmeida <ro...@dealmeida.net>
AuthorDate: Thu Aug 10 16:14:50 2023 -0700

    fix: to_datetime in Pandas 2
---
 superset/utils/core.py              | 10 ++++++++--
 tests/unit_tests/utils/test_core.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/superset/utils/core.py b/superset/utils/core.py
index cd8c62efe7..8b1cc1a485 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -1834,7 +1834,12 @@ def normalize_dttm_col(
                 # Column is formatted as a numeric value
                 unit = _col.timestamp_format.replace("epoch_", "")
                 df[_col.col_label] = pd.to_datetime(
-                    dttm_series, utc=False, unit=unit, origin="unix", errors="coerce"
+                    dttm_series,
+                    utc=False,
+                    unit=unit,
+                    origin="unix",
+                    errors="raise",
+                    exact=False,
                 )
             else:
                 # Column has already been formatted as a timestamp.
@@ -1844,7 +1849,8 @@ def normalize_dttm_col(
                 df[_col.col_label],
                 utc=False,
                 format=_col.timestamp_format,
-                errors="coerce",
+                errors="raise",
+                exact=False,
             )
         if _col.offset:
             df[_col.col_label] += timedelta(hours=_col.offset)
diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py
index 568595517c..562ebe582e 100644
--- a/tests/unit_tests/utils/test_core.py
+++ b/tests/unit_tests/utils/test_core.py
@@ -17,11 +17,14 @@
 import os
 from typing import Any, Optional
 
+import pandas as pd
 import pytest
 
 from superset.utils.core import (
     cast_to_boolean,
+    DateColumn,
     is_test,
+    normalize_dttm_col,
     parse_boolean_string,
     QueryObjectFilterClause,
     remove_extra_adhoc_filters,
@@ -171,3 +174,30 @@ def test_other_values():
     assert cast_to_boolean([]) is False
     assert cast_to_boolean({}) is False
     assert cast_to_boolean(object()) is False
+
+
+def test_normalize_dttm_col() -> None:
+    """
+    Tests for the ``normalize_dttm_col`` function.
+
+    In particular, this covers a regression when Pandas was upgraded from 1.5.3 to
+    2.0.3 and the behavior of ``pd.to_datetime`` changed.
+    """
+    df = pd.DataFrame({"__time": ["2017-07-01T00:00:00.000Z"]})
+    assert (
+        df.to_markdown()
+        == """
+|    | __time                   |
+|---:|:-------------------------|
+|  0 | 2017-07-01T00:00:00.000Z |
+    """.strip()
+    )
+
+    # in 1.5.3 this would return a datetime64[ns] dtype, but in 2.0.3 we had to
+    # add ``exact=False`` since there is a leftover after parsing the format
+    dttm_cols = (DateColumn("__time", "%Y-%m-%d"),)
+
+    # the function modifies the dataframe in place
+    normalize_dttm_col(df, dttm_cols)
+
+    assert df["__time"].astype(str).tolist() == ["2017-07-01"]