You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/01/24 04:44:28 UTC
arrow git commit: ARROW-378: Python: Respect timezone on conversion
of Pandas datetime columns
Repository: arrow
Updated Branches:
refs/heads/master 085c8754b -> c90ca60c1
ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns
arrow is now pandas datetime timezone aware
Author: ahnj <ah...@yahoo.com>
Closes #287 from ahnj/timestamp-aware and squashes the following commits:
0221ed0 [ahnj] ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c90ca60c
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c90ca60c
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c90ca60c
Branch: refs/heads/master
Commit: c90ca60c1859b2b70c4f2dd3fb8c41b0f75f02d0
Parents: 085c875
Author: ahnj <ah...@yahoo.com>
Authored: Mon Jan 23 23:44:22 2017 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Jan 23 23:44:22 2017 -0500
----------------------------------------------------------------------
python/pyarrow/array.pyx | 6 ++++-
python/pyarrow/tests/test_convert_pandas.py | 29 ++++++++++++++++++++++--
2 files changed, 32 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index 92206f2..c3a5a04 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -71,9 +71,13 @@ cdef class Array:
timestamps_to_ms : bool, optional
Convert datetime columns to ms resolution. This is needed for
- compability with other functionality like Parquet I/O which
+ compatibility with other functionality like Parquet I/O which
only supports milliseconds.
+ Notes
+ -----
+ Localized timestamps will currently be returned as UTC (pandas's native representation).
+ Timezone-naive data will be implicitly interpreted as UTC.
Examples
--------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index 30705c4..674a436 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -63,7 +63,7 @@ class TestPandasConversion(unittest.TestCase):
def _check_pandas_roundtrip(self, df, expected=None, nthreads=1,
timestamps_to_ms=False, expected_schema=None,
- schema=None):
+ check_dtype=True, schema=None):
table = A.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms,
schema=schema)
result = table.to_pandas(nthreads=nthreads)
@@ -71,7 +71,7 @@ class TestPandasConversion(unittest.TestCase):
assert table.schema.equals(expected_schema)
if expected is None:
expected = df
- tm.assert_frame_equal(result, expected)
+ tm.assert_frame_equal(result, expected, check_dtype=check_dtype)
def _check_array_roundtrip(self, values, expected=None,
timestamps_to_ms=False, field=None):
@@ -284,6 +284,31 @@ class TestPandasConversion(unittest.TestCase):
self._check_pandas_roundtrip(df, timestamps_to_ms=False,
expected_schema=schema)
+ def test_timestamps_with_timezone(self):
+ df = pd.DataFrame({
+ 'datetime64': np.array([
+ '2007-07-13T01:23:34.123',
+ '2006-01-13T12:34:56.432',
+ '2010-08-13T05:46:57.437'],
+ dtype='datetime64[ms]')
+ })
+ df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame()
+ df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame()
+ self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=True, check_dtype=False)
+
+ # drop-in a null and ns instead of ms
+ df = pd.DataFrame({
+ 'datetime64': np.array([
+ '2007-07-13T01:23:34.123456789',
+ None,
+ '2006-01-13T12:34:56.432539784',
+ '2010-08-13T05:46:57.437699912'],
+ dtype='datetime64[ns]')
+ })
+ df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame()
+ df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame()
+ self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=False, check_dtype=False)
+
def test_date(self):
df = pd.DataFrame({
'date': [datetime.date(2000, 1, 1),