You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2017/06/23 13:51:36 UTC

arrow git commit: ARROW-1039: Python: Remove duplicate column

Repository: arrow
Updated Branches:
  refs/heads/master a16c1246e -> c1ec0c723


ARROW-1039: Python: Remove duplicate column

Note that part of this problem was related to the fix I made in https://github.com/apache/parquet-cpp/pull/358/files#diff-2f5ceebd1726b16db561185cc620d18e

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #773 from xhochy/ARROW-1039 and squashes the following commits:

44a002a [Uwe L. Korn] ARROW-1039: Python: Remove duplicate column


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c1ec0c72
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c1ec0c72
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c1ec0c72

Branch: refs/heads/master
Commit: c1ec0c723e43b9450ef2655e6415eb40301c4ce8
Parents: a16c124
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Fri Jun 23 15:49:55 2017 +0200
Committer: Uwe L. Korn <uw...@xhochy.com>
Committed: Fri Jun 23 15:49:55 2017 +0200

----------------------------------------------------------------------
 python/pyarrow/array.pxi             | 29 +++++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  6 ++++++
 python/pyarrow/tests/test_parquet.py |  4 ++--
 3 files changed, 37 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/array.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 5930de3..c7563c8 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -432,6 +432,7 @@ cdef set PRIMITIVE_TYPES = set([
     _Type_UINT32, _Type_INT32,
     _Type_UINT64, _Type_INT64,
     _Type_TIMESTAMP, _Type_DATE32,
+    _Type_TIME32, _Type_TIME64,
     _Type_DATE64,
     _Type_HALF_FLOAT,
     _Type_FLOAT,
@@ -816,6 +817,32 @@ cdef class Date64Value(ArrayValue):
             ap.Value(self.index) / 1000).date()
 
 
+cdef class Time32Value(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTime32Array* ap = <CTime32Array*> self.sp_array.get()
+            CTime32Type* dtype = <CTime32Type*> ap.type().get()
+
+        if dtype.unit() == TimeUnit_SECOND:
+            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=ap.Value(self.index))).time()
+        else:
+            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(milliseconds=ap.Value(self.index))).time()
+
+
+cdef class Time64Value(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTime64Array* ap = <CTime64Array*> self.sp_array.get()
+            CTime64Type* dtype = <CTime64Type*> ap.type().get()
+
+        if dtype.unit() == TimeUnit_MICRO:
+            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index))).time()
+        else:
+            return (datetime.datetime(1970, 1, 1) + datetime.timedelta(microseconds=ap.Value(self.index) / 1000)).time()
+
+
 cdef dict DATETIME_CONVERSION_FUNCTIONS
 
 try:
@@ -975,6 +1002,8 @@ cdef dict _scalar_classes = {
     _Type_INT64: Int64Value,
     _Type_DATE32: Date32Value,
     _Type_DATE64: Date64Value,
+    _Type_TIME32: Time32Value,
+    _Type_TIME64: Time64Value,
     _Type_TIMESTAMP: TimestampValue,
     _Type_FLOAT: FloatValue,
     _Type_DOUBLE: DoubleValue,

http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 9df31c8..f712274 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -249,6 +249,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CDate64Array" arrow::Date64Array"(CArray):
         int64_t Value(int i)
 
+    cdef cppclass CTime32Array" arrow::Time32Array"(CArray):
+        int32_t Value(int i)
+
+    cdef cppclass CTime64Array" arrow::Time64Array"(CArray):
+        int64_t Value(int i)
+
     cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
         int64_t Value(int i)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/c1ec0c72/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 052d395..3c2b73e 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -449,13 +449,13 @@ def test_date_time_types():
 
     table = pa.Table.from_arrays([a1, a2, a3, a4, a5, a6],
                                  ['date32', 'date64', 'timestamp[us]',
-                                  'time32[s]', 'time64[us]', 'time32[s]'])
+                                  'time32[s]', 'time64[us]', 'time32_from64[s]'])
 
     # date64 as date32
     # time32[s] to time32[ms]
     expected = pa.Table.from_arrays([a1, a1, a3, a4, a5, ex_a6],
                                     ['date32', 'date64', 'timestamp[us]',
-                                     'time32[s]', 'time64[us]', 'time32[s]'])
+                                     'time32[s]', 'time64[us]', 'time32_from64[s]'])
 
     _check_roundtrip(table, expected=expected, version='2.0')