You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by jo...@apache.org on 2023/06/06 15:24:12 UTC

[arrow] branch main updated: GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database (#35735)

This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c78ef0f1a4 GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database (#35735)
c78ef0f1a4 is described below

commit c78ef0f1a4adc9211aef78345415faa00cefa0bf
Author: Joris Van den Bossche <jo...@gmail.com>
AuthorDate: Tue Jun 6 17:24:04 2023 +0200

    GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database (#35735)
    
    ### Rationale for this change
    
    Fix up of https://github.com/apache/arrow/pull/35395, skipping one of the tests added in that PR on Windows, because the test requires access to a tz database.
    
    Authored-by: Joris Van den Bossche <jo...@gmail.com>
    Signed-off-by: Joris Van den Bossche <jo...@gmail.com>
---
 python/pyarrow/tests/test_compute.py | 44 +++++++++++++++++++-----------------
 python/pyarrow/tests/test_scalars.py |  4 ++++
 python/pyarrow/tests/util.py         |  9 ++++++++
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py
index 875d0e613b..f934edd3c3 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -36,6 +36,8 @@ except ImportError:
 import pyarrow as pa
 import pyarrow.compute as pc
 from pyarrow.lib import ArrowNotImplementedError
+from pyarrow.tests import util
+
 
 all_array_types = [
     ('bool', [True, False, False, True, True]),
@@ -180,17 +182,19 @@ def test_option_class_equality():
         pc.WeekOptions(week_starts_monday=True, count_from_zero=False,
                        first_week_is_fully_in_year=False),
     ]
-    # TODO: We should test on windows once ARROW-13168 is resolved.
-    # Timezone database is not available on Windows yet
-    if sys.platform != 'win32':
+    # Timezone database might not be installed on Windows
+    if sys.platform != "win32" or util.windows_has_tzdata():
         options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana"))
 
     classes = {type(option) for option in options}
 
     for cls in exported_option_classes:
-        # Timezone database is not available on Windows yet
-        if cls not in classes and sys.platform != 'win32' and \
-                cls != pc.AssumeTimezoneOptions:
+        # Timezone database might not be installed on Windows
+        if (
+            cls not in classes
+            and (sys.platform != "win32" or util.windows_has_tzdata())
+            and cls != pc.AssumeTimezoneOptions
+        ):
             try:
                 options.append(cls())
             except TypeError:
@@ -1846,17 +1850,18 @@ def test_strptime():
     assert got == pa.array([None, None, None], type=pa.timestamp('s'))
 
 
-# TODO: We should test on windows once ARROW-13168 is resolved.
 @pytest.mark.pandas
-@pytest.mark.skipif(sys.platform == 'win32',
-                    reason="Timezone database is not available on Windows yet")
+@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
+                    reason="Timezone database is not installed on Windows")
 def test_strftime():
     times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
     timezones = ["CET", "UTC", "Europe/Ljubljana"]
 
-    formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H",
-               "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x",
-               "%X", "%%", "%G", "%V", "%u"]
+    formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I",
+               "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%%", "%G", "%V", "%u"]
+    if sys.platform != "win32":
+        # Locale-dependent formats don't match on Windows
+        formats.extend(["%c", "%x", "%X"])
 
     for timezone in timezones:
         ts = pd.to_datetime(times).tz_localize(timezone)
@@ -2029,18 +2034,16 @@ def test_extract_datetime_components():
     _check_datetime_components(timestamps)
 
     # Test timezone aware timestamp array
-    if sys.platform == 'win32':
-        # TODO: We should test on windows once ARROW-13168 is resolved.
-        pytest.skip('Timezone database is not available on Windows yet')
+    if sys.platform == "win32" and not util.windows_has_tzdata():
+        pytest.skip('Timezone database is not installed on Windows')
     else:
         for timezone in timezones:
             _check_datetime_components(timestamps, timezone)
 
 
-# TODO: We should test on windows once ARROW-13168 is resolved.
 @pytest.mark.pandas
-@pytest.mark.skipif(sys.platform == 'win32',
-                    reason="Timezone database is not available on Windows yet")
+@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
+                    reason="Timezone database is not installed on Windows")
 def test_assume_timezone():
     ts_type = pa.timestamp("ns")
     timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
@@ -2235,9 +2238,8 @@ def _check_temporal_rounding(ts, values, unit):
         np.testing.assert_array_equal(result, expected)
 
 
-# TODO: We should test on windows once ARROW-13168 is resolved.
-@pytest.mark.skipif(sys.platform == 'win32',
-                    reason="Timezone database is not available on Windows yet")
+@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
+                    reason="Timezone database is not installed on Windows")
 @pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond",
                                   "second", "minute", "hour", "day"))
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py
index 1e6a3f29e0..b7180e5250 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -19,12 +19,14 @@ import datetime
 import decimal
 import pickle
 import pytest
+import sys
 import weakref
 
 import numpy as np
 
 import pyarrow as pa
 import pyarrow.compute as pc
+from pyarrow.tests import util
 
 
 @pytest.mark.parametrize(['value', 'ty', 'klass'], [
@@ -304,6 +306,8 @@ def test_cast():
         pa.scalar('foo').cast('int32')
 
 
+@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
+                    reason="Timezone database is not installed on Windows")
 def test_cast_timestamp_to_string():
     # GH-35370
     pytest.importorskip("pytz")
diff --git a/python/pyarrow/tests/util.py b/python/pyarrow/tests/util.py
index df7936371e..0b69deb73b 100644
--- a/python/pyarrow/tests/util.py
+++ b/python/pyarrow/tests/util.py
@@ -448,3 +448,12 @@ def _configure_s3_limited_user(s3_server, policy):
 
     except FileNotFoundError:
         pytest.skip("Configuring limited s3 user failed")
+
+
+def windows_has_tzdata():
+    """
+    This is the default location where tz.cpp will look for (until we make
+    this configurable at run-time)
+    """
+    tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
+    return os.path.exists(tzdata_path)