You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/24 04:44:28 UTC

arrow git commit: ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns

Repository: arrow
Updated Branches:
  refs/heads/master 085c8754b -> c90ca60c1


ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns

arrow is now pandas datetime timezone aware

Author: ahnj <ah...@yahoo.com>

Closes #287 from ahnj/timestamp-aware and squashes the following commits:

0221ed0 [ahnj] ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c90ca60c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c90ca60c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c90ca60c

Branch: refs/heads/master
Commit: c90ca60c1859b2b70c4f2dd3fb8c41b0f75f02d0
Parents: 085c875
Author: ahnj <ah...@yahoo.com>
Authored: Mon Jan 23 23:44:22 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jan 23 23:44:22 2017 -0500

----------------------------------------------------------------------
 python/pyarrow/array.pyx                    |  6 ++++-
 python/pyarrow/tests/test_convert_pandas.py | 29 ++++++++++++++++++++++--
 2 files changed, 32 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 92206f2..c3a5a04 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -71,9 +71,13 @@ cdef class Array:
 
         timestamps_to_ms : bool, optional
             Convert datetime columns to ms resolution. This is needed for
-            compability with other functionality like Parquet I/O which
+            compatibility with other functionality like Parquet I/O which
             only supports milliseconds.
 
+        Notes
+        -----
+        Localized timestamps will currently be returned as UTC (pandas's native representation).
+        Timezone-naive data will be implicitly interpreted as UTC.
 
         Examples
         --------

http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 30705c4..674a436 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -63,7 +63,7 @@ class TestPandasConversion(unittest.TestCase):
 
     def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
                                 timestamps_to_ms=False, expected_schema=None,
-                                schema=None):
+                                check_dtype=True, schema=None):
         table = A.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
                                     schema=schema)
         result = table.to_pandas(nthreads=nthreads)
@@ -71,7 +71,7 @@ class TestPandasConversion(unittest.TestCase):
             assert table.schema.equals(expected_schema)
         if expected is None:
             expected = df
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_dtype=check_dtype)
 
     def _check_array_roundtrip(self, values, expected=None,
                                timestamps_to_ms=False, field=None):
@@ -284,6 +284,31 @@ class TestPandasConversion(unittest.TestCase):
         self._check_pandas_roundtrip(df, timestamps_to_ms=False,
                                      expected_schema=schema)
 
+    def test_timestamps_with_timezone(self):
+        df = pd.DataFrame({
+            'datetime64': np.array([
+                '2007-07-13T01:23:34.123',
+                '2006-01-13T12:34:56.432',
+                '2010-08-13T05:46:57.437'],
+                dtype='datetime64[ms]')
+            })
+        df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame()
+        df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame()
+        self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=True, check_dtype=False)
+
+        # drop-in a null and ns instead of ms
+        df = pd.DataFrame({
+            'datetime64': np.array([
+                '2007-07-13T01:23:34.123456789',
+                None,
+                '2006-01-13T12:34:56.432539784',
+                '2010-08-13T05:46:57.437699912'],
+                dtype='datetime64[ns]')
+            })
+        df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame()
+        df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame()
+        self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=False, check_dtype=False)
+
     def test_date(self):
         df = pd.DataFrame({
             'date': [datetime.date(2000, 1, 1),