You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/06/23 13:51:36 UTC
arrow git commit: ARROW-1039: Python: Remove duplicate column
Repository: arrow
Updated Branches:
refs/heads/master a16c1246e -> c1ec0c723
ARROW-1039: Python: Remove duplicate column
Note that part of this problem was related to the fix I made in https://github.com/apache/parquet-cpp/pull/358/files#diff-2f5ceebd1726b16db561185cc620d18e
Author: Uwe L. Korn <uw...@xhochy.com>
Closes #773 from xhochy/ARROW-1039 and squashes the following commits:
44a002a [Uwe L. Korn] ARROW-1039: Python: Remove duplicate column
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c1ec0c72
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c1ec0c72
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c1ec0c72
Branch: refs/heads/master
Commit: c1ec0c723e43b9450ef2655e6415eb40301c4ce8
Parents: a16c124
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jun 23 15:49:55 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Fri Jun 23 15:49:55 2017 +0200
----------------------------------------------------------------------
python/pyarrow/array.pxi | 29 +++++++++++++++++++++++++++++
python/pyarrow/includes/libarrow.pxd | 6 ++++++
python/pyarrow/tests/test_parquet.py | 4 ++--
3 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/array.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5930de3..c7563c8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -432,6 +432,7 @@ cdef set PRIMITIVE_TYPES = set([
_Type_UINT32, _Type_INT32,
_Type_UINT64, _Type_INT64,
_Type_TIMESTAMP, _Type_DATE32,
+ _Type_TIME32, _Type_TIME64,
_Type_DATE64,
_Type_HALF_FLOAT,
_Type_FLOAT,
@@ -816,6 +817,32 @@ cdef class Date64Value(ArrayValue):
ap.Value(self.index) / 1000).date()
+cdef class Time32Value(ArrayValue):
+
+ def as_py(self):
+ cdef:
+ CTime32Array* ap = <CTime32Array*> self.sp_array.get()
+ CTime32Type* dtype = <CTime32Type*> ap.type().get()
+
+ if dtype.unit() == TimeUnit_SECOND:
+ return (datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=ap.Value(self.index))).time()
+ else:
+ return (datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=ap.Value(self.index))).time()
+
+
+cdef class Time64Value(ArrayValue):
+
+ def as_py(self):
+ cdef:
+ CTime64Array* ap = <CTime64Array*> self.sp_array.get()
+ CTime64Type* dtype = <CTime64Type*> ap.type().get()
+
+ if dtype.unit() == TimeUnit_MICRO:
+ return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index))).time()
+ else:
+ return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index) / 1000)).time()
+
+
cdef dict DATETIME_CONVERSION_FUNCTIONS
try:
@@ -975,6 +1002,8 @@ cdef dict _scalar_classes = {
_Type_INT64: Int64Value,
_Type_DATE32: Date32Value,
_Type_DATE64: Date64Value,
+ _Type_TIME32: Time32Value,
+ _Type_TIME64: Time64Value,
_Type_TIMESTAMP: TimestampValue,
_Type_FLOAT: FloatValue,
_Type_DOUBLE: DoubleValue,
http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9df31c8..f712274 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -249,6 +249,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CDate64Array" arrow::Date64Array"(CArray):
int64_t Value(int i)
+ cdef cppclass CTime32Array" arrow::Time32Array"(CArray):
+ int32_t Value(int i)
+
+ cdef cppclass CTime64Array" arrow::Time64Array"(CArray):
+ int64_t Value(int i)
+
cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
int64_t Value(int i)
http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 052d395..3c2b73e 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -449,13 +449,13 @@ def test_date_time_types():
table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6],
['date32', 'date64', 'timestamp[us]',
- 'time32[s]', 'time64[us]', 'time32[s]'])
+ 'time32[s]', 'time64[us]', 'time32_from64[s]'])
# date64 as date32
# time32[s] to time32[ms]
expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6],
['date32', 'date64', 'timestamp[us]',
- 'time32[s]', 'time64[us]', 'time32[s]'])
+ 'time32[s]', 'time64[us]', 'time32_from64[s]'])
_check_roundtrip(table, expected=expected, version='2.0')