You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2016/12/19 23:44:15 UTC

arrow git commit: ARROW-420: Align DATE type with Java implementation

Repository: arrow
Updated Branches:
  refs/heads/master cfde4607d -> d7845fcd8


ARROW-420: Align DATE type with Java implementation

Author: Uwe L. Korn <uw...@xhochy.com>

Closes #238 from xhochy/ARROW-420 and squashes the following commits:

e497d9f [Uwe L. Korn] Add datetime.date parsing for numpy conversion
5c21453 [Uwe L. Korn] Add support for datetime.datetime
6bf346f [Uwe L. Korn] Add datetime.date conversions
6fca4da [Uwe L. Korn] ARROW-420: Align DATE type with Java implementation


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/d7845fcd
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/d7845fcd
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/d7845fcd

Branch: refs/heads/master
Commit: d7845fcd8b8a06248e42ca083c6460c43723c154
Parents: cfde460
Author: Uwe L. Korn <uw...@xhochy.com>
Authored: Mon Dec 19 18:44:09 2016 -0500
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon Dec 19 18:44:09 2016 -0500

----------------------------------------------------------------------
 cpp/src/arrow/array.cc                       |   1 +
 cpp/src/arrow/array.h                        |   1 +
 cpp/src/arrow/builder.cc                     |   2 +
 cpp/src/arrow/builder.h                      |   1 +
 cpp/src/arrow/type.cc                        |   4 +
 cpp/src/arrow/type.h                         |   4 +-
 cpp/src/arrow/type_fwd.h                     |   4 +-
 cpp/src/arrow/type_traits.h                  |   8 ++
 python/pyarrow/__init__.py                   |   1 +
 python/pyarrow/array.pyx                     |   7 +-
 python/pyarrow/includes/libarrow.pxd         |  16 ++++
 python/pyarrow/scalar.pyx                    |  31 +++++++
 python/pyarrow/schema.pyx                    |   7 ++
 python/pyarrow/tests/test_convert_builtin.py |  28 ++++++
 python/pyarrow/tests/test_convert_pandas.py  |  15 ++++
 python/src/pyarrow/adapters/builtin.cc       |  69 +++++++++++++++
 python/src/pyarrow/adapters/pandas.cc        | 103 ++++++++++++++++++----
 python/src/pyarrow/helpers.cc                |   6 ++
 python/src/pyarrow/helpers.h                 |   2 +
 python/src/pyarrow/util/datetime.h           |  40 +++++++++
 20 files changed, 330 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/array.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc
index 7ab61f5..d13fa1e 100644
--- a/cpp/src/arrow/array.cc
+++ b/cpp/src/arrow/array.cc
@@ -148,6 +148,7 @@ template class NumericArray<Int16Type>;
 template class NumericArray<Int32Type>;
 template class NumericArray<Int64Type>;
 template class NumericArray<TimestampType>;
+template class NumericArray<DateType>;
 template class NumericArray<HalfFloatType>;
 template class NumericArray<FloatType>;
 template class NumericArray<DoubleType>;

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/array.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h
index 1a4a923..26d53f7 100644
--- a/cpp/src/arrow/array.h
+++ b/cpp/src/arrow/array.h
@@ -468,6 +468,7 @@ extern template class ARROW_EXPORT NumericArray<HalfFloatType>;
 extern template class ARROW_EXPORT NumericArray<FloatType>;
 extern template class ARROW_EXPORT NumericArray<DoubleType>;
 extern template class ARROW_EXPORT NumericArray<TimestampType>;
+extern template class ARROW_EXPORT NumericArray<DateType>;
 
 #if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/builder.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index 493b5e7..1d94dba 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -199,6 +199,7 @@ template class PrimitiveBuilder<Int8Type>;
 template class PrimitiveBuilder<Int16Type>;
 template class PrimitiveBuilder<Int32Type>;
 template class PrimitiveBuilder<Int64Type>;
+template class PrimitiveBuilder<DateType>;
 template class PrimitiveBuilder<TimestampType>;
 template class PrimitiveBuilder<HalfFloatType>;
 template class PrimitiveBuilder<FloatType>;
@@ -411,6 +412,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
     BUILDER_CASE(INT32, Int32Builder);
     BUILDER_CASE(UINT64, UInt64Builder);
     BUILDER_CASE(INT64, Int64Builder);
+    BUILDER_CASE(DATE, DateBuilder);
     BUILDER_CASE(TIMESTAMP, TimestampBuilder);
 
     BUILDER_CASE(BOOL, BooleanBuilder);

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/builder.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index 7162d31..2051398 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -220,6 +220,7 @@ using Int16Builder = NumericBuilder<Int16Type>;
 using Int32Builder = NumericBuilder<Int32Type>;
 using Int64Builder = NumericBuilder<Int64Type>;
 using TimestampBuilder = NumericBuilder<TimestampType>;
+using DateBuilder = NumericBuilder<DateType>;
 
 using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
 using FloatBuilder = NumericBuilder<FloatType>;

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/type.cc
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index 5b172e4..4748cc3 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -88,6 +88,10 @@ std::string StructType::ToString() const {
   return s.str();
 }
 
+std::string DateType::ToString() const {
+  return std::string("date");
+}
+
 std::string UnionType::ToString() const {
   std::stringstream s;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/type.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index 8637081..7300570 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -413,14 +413,14 @@ struct ARROW_EXPORT UnionType : public DataType {
 struct ARROW_EXPORT DateType : public FixedWidthType {
   static constexpr Type::type type_id = Type::DATE;
 
-  using c_type = int32_t;
+  using c_type = int64_t;
 
   DateType() : FixedWidthType(Type::DATE) {}
 
   int bit_width() const override { return sizeof(c_type) * 8; }
 
   Status Accept(TypeVisitor* visitor) const override;
-  std::string ToString() const override { return name(); }
+  std::string ToString() const override;
   static std::string name() { return "date"; }
 };
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/type_fwd.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_fwd.h b/cpp/src/arrow/type_fwd.h
index 6d660f4..a9db32d 100644
--- a/cpp/src/arrow/type_fwd.h
+++ b/cpp/src/arrow/type_fwd.h
@@ -87,13 +87,15 @@ _NUMERIC_TYPE_DECL(Double);
 #undef _NUMERIC_TYPE_DECL
 
 struct DateType;
-class DateArray;
+using DateArray = NumericArray<DateType>;
+using DateBuilder = NumericBuilder<DateType>;
 
 struct TimeType;
 class TimeArray;
 
 struct TimestampType;
 using TimestampArray = NumericArray<TimestampType>;
+using TimestampBuilder = NumericBuilder<TimestampType>;
 
 struct IntervalType;
 using IntervalArray = NumericArray<IntervalType>;

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/cpp/src/arrow/type_traits.h
----------------------------------------------------------------------
diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h
index 3aaec0b..5616018 100644
--- a/cpp/src/arrow/type_traits.h
+++ b/cpp/src/arrow/type_traits.h
@@ -91,6 +91,14 @@ struct TypeTraits<Int64Type> {
 };
 
 template <>
+struct TypeTraits<DateType> {
+  using ArrayType = DateArray;
+  // using BuilderType = DateBuilder;
+
+  static inline int bytes_required(int elements) { return elements * sizeof(int64_t); }
+};
+
+template <>
 struct TypeTraits<TimestampType> {
   using ArrayType = TimestampArray;
   // using BuilderType = TimestampBuilder;

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/__init__.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index b9d3861..a42e39c 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -37,6 +37,7 @@ from pyarrow.scalar import (ArrayValue, Scalar, NA, NAType,
 from pyarrow.schema import (null, bool_,
                             int8, int16, int32, int64,
                             uint8, uint16, uint32, uint64,
+                            timestamp, date,
                             float_, double, string,
                             list_, struct, field,
                             DataType, Field, Schema, schema)

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/array.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx
index d44212f..84f1705 100644
--- a/python/pyarrow/array.pyx
+++ b/python/pyarrow/array.pyx
@@ -218,6 +218,10 @@ cdef class UInt64Array(NumericArray):
     pass
 
 
+cdef class DateArray(NumericArray):
+    pass
+
+
 cdef class FloatArray(NumericArray):
     pass
 
@@ -245,6 +249,7 @@ cdef dict _array_classes = {
     Type_INT16: Int16Array,
     Type_INT32: Int32Array,
     Type_INT64: Int64Array,
+    Type_DATE: DateArray,
     Type_FLOAT: FloatArray,
     Type_DOUBLE: DoubleArray,
     Type_LIST: ListArray,
@@ -284,7 +289,7 @@ def from_pylist(object list_obj, DataType type=None):
     if type is None:
         check_status(pyarrow.ConvertPySequence(list_obj, &sp_array))
     else:
-        raise NotImplementedError
+        raise NotImplementedError()
 
     return box_arrow_array(sp_array)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/includes/libarrow.pxd
----------------------------------------------------------------------
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 15781ce..419dd74 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -39,11 +39,18 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         Type_DOUBLE" arrow::Type::DOUBLE"
 
         Type_TIMESTAMP" arrow::Type::TIMESTAMP"
+        Type_DATE" arrow::Type::DATE"
         Type_STRING" arrow::Type::STRING"
 
         Type_LIST" arrow::Type::LIST"
         Type_STRUCT" arrow::Type::STRUCT"
 
+    enum TimeUnit" arrow::TimeUnit":
+        TimeUnit_SECOND" arrow::TimeUnit::SECOND"
+        TimeUnit_MILLI" arrow::TimeUnit::MILLI"
+        TimeUnit_MICRO" arrow::TimeUnit::MICRO"
+        TimeUnit_NANO" arrow::TimeUnit::NANO"
+
     cdef cppclass CDataType" arrow::DataType":
         Type type
 
@@ -74,6 +81,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CStringType" arrow::StringType"(CDataType):
         pass
 
+    cdef cppclass CTimestampType" arrow::TimestampType"(CDataType):
+        TimeUnit unit
+
     cdef cppclass CField" arrow::Field":
         c_string name
         shared_ptr[CDataType] type
@@ -132,6 +142,12 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CInt64Array" arrow::Int64Array"(CArray):
         int64_t Value(int i)
 
+    cdef cppclass CDateArray" arrow::DateArray"(CArray):
+        int64_t Value(int i)
+
+    cdef cppclass CTimestampArray" arrow::TimestampArray"(CArray):
+        int64_t Value(int i)
+
     cdef cppclass CFloatArray" arrow::FloatArray"(CArray):
         float Value(int i)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/scalar.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx
index c2d20e4..09f60e2 100644
--- a/python/pyarrow/scalar.pyx
+++ b/python/pyarrow/scalar.pyx
@@ -20,6 +20,9 @@ from pyarrow.schema cimport DataType, box_data_type
 from pyarrow.compat import frombytes
 import pyarrow.schema as schema
 
+import datetime
+
+
 NA = None
 
 cdef class NAType(Scalar):
@@ -120,6 +123,32 @@ cdef class UInt64Value(ArrayValue):
         return ap.Value(self.index)
 
 
+cdef class DateValue(ArrayValue):
+
+    def as_py(self):
+        cdef CDateArray* ap = <CDateArray*> self.sp_array.get()
+        return datetime.date.fromtimestamp(ap.Value(self.index) / 1000)
+
+
+cdef class TimestampValue(ArrayValue):
+
+    def as_py(self):
+        cdef:
+            CTimestampArray* ap = <CTimestampArray*> self.sp_array.get()
+            CTimestampType* dtype = <CTimestampType*>ap.type().get()
+            int64_t val = ap.Value(self.index)
+
+        if dtype.unit == TimeUnit_SECOND:
+            return datetime.datetime.utcfromtimestamp(val)
+        elif dtype.unit == TimeUnit_MILLI:
+            return datetime.datetime.utcfromtimestamp(float(val) / 1000)
+        elif dtype.unit == TimeUnit_MICRO:
+            return datetime.datetime.utcfromtimestamp(float(val) / 1000000)
+        else:
+            # TimeUnit_NANO
+            raise NotImplementedError("Cannot convert nanosecond timestamps to datetime.datetime")
+
+
 cdef class FloatValue(ArrayValue):
 
     def as_py(self):
@@ -184,6 +213,8 @@ cdef dict _scalar_classes = {
     Type_INT16: Int16Value,
     Type_INT32: Int32Value,
     Type_INT64: Int64Value,
+    Type_DATE: DateValue,
+    Type_TIMESTAMP: TimestampValue,
     Type_FLOAT: FloatValue,
     Type_DOUBLE: DoubleValue,
     Type_LIST: ListValue,

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/schema.pyx
----------------------------------------------------------------------
diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx
index e0badb9..d05ac9e 100644
--- a/python/pyarrow/schema.pyx
+++ b/python/pyarrow/schema.pyx
@@ -164,6 +164,7 @@ cdef set PRIMITIVE_TYPES = set([
     Type_UINT16, Type_INT16,
     Type_UINT32, Type_INT32,
     Type_UINT64, Type_INT64,
+    Type_TIMESTAMP, Type_DATE,
     Type_FLOAT, Type_DOUBLE])
 
 def null():
@@ -196,6 +197,12 @@ def uint64():
 def int64():
     return primitive_type(Type_INT64)
 
+def timestamp():
+    return primitive_type(Type_TIMESTAMP)
+
+def date():
+    return primitive_type(Type_DATE)
+
 def float_():
     return primitive_type(Type_FLOAT)
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/tests/test_convert_builtin.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 34371b0..7dc1c1b 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -18,6 +18,7 @@
 from pyarrow.compat import unittest
 import pyarrow
 
+import datetime
 
 class TestConvertList(unittest.TestCase):
 
@@ -78,6 +79,33 @@ class TestConvertList(unittest.TestCase):
         assert arr.type == pyarrow.string()
         assert arr.to_pylist() == ['foo', 'bar', None, 'arrow']
 
+    def test_date(self):
+        data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1), datetime.date(2040, 2, 26)]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.type == pyarrow.date()
+        assert arr.null_count == 1
+        assert arr[0].as_py() == datetime.date(2000, 1, 1)
+        assert arr[1].as_py() is None
+        assert arr[2].as_py() == datetime.date(1970, 1, 1)
+        assert arr[3].as_py() == datetime.date(2040, 2, 26)
+
+    def test_timestamp(self):
+        data = [
+            datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
+            None,
+            datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
+            datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
+        ]
+        arr = pyarrow.from_pylist(data)
+        assert len(arr) == 4
+        assert arr.type == pyarrow.timestamp()
+        assert arr.null_count == 1
+        assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)
+        assert arr[1].as_py() is None
+        assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12, 34, 56, 432539)
+        assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
+
     def test_mixed_nesting_levels(self):
         pyarrow.from_pylist([1, 2, None])
         pyarrow.from_pylist([[1], [2], None])

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/pyarrow/tests/test_convert_pandas.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index b527ca7..cf50f3d 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import datetime
 import unittest
 
 import numpy as np
@@ -204,6 +205,20 @@ class TestPandasConversion(unittest.TestCase):
             })
         self._check_pandas_roundtrip(df, timestamps_to_ms=False)
 
+    def test_date(self):
+        df = pd.DataFrame({
+            'date': [
+                datetime.date(2000, 1, 1),
+                None,
+                datetime.date(1970, 1, 1),
+                datetime.date(2040, 2, 26)
+        ]})
+        table = A.from_pandas_dataframe(df)
+        result = table.to_pandas()
+        expected = df.copy()
+        expected['date'] = pd.to_datetime(df['date'])
+        tm.assert_frame_equal(result, expected)
+
     # def test_category(self):
     #     repeats = 1000
     #     values = [b'foo', None, u'bar', 'qux', np.nan]

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/src/pyarrow/adapters/builtin.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc
index ac2f533..e0cb7c2 100644
--- a/python/src/pyarrow/adapters/builtin.cc
+++ b/python/src/pyarrow/adapters/builtin.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include <Python.h>
+#include <datetime.h>
 #include <sstream>
 
 #include "pyarrow/adapters/builtin.h"
@@ -24,6 +25,7 @@
 #include "arrow/status.h"
 
 #include "pyarrow/helpers.h"
+#include "pyarrow/util/datetime.h"
 
 using arrow::ArrayBuilder;
 using arrow::DataType;
@@ -55,6 +57,8 @@ class ScalarVisitor {
       none_count_(0),
       bool_count_(0),
       int_count_(0),
+      date_count_(0),
+      timestamp_count_(0),
       float_count_(0),
       string_count_(0) {}
 
@@ -68,6 +72,10 @@ class ScalarVisitor {
       ++float_count_;
     } else if (IsPyInteger(obj)) {
       ++int_count_;
+    } else if (PyDate_CheckExact(obj)) {
+      ++date_count_;
+    } else if (PyDateTime_CheckExact(obj)) {
+      ++timestamp_count_;
     } else if (IsPyBaseString(obj)) {
       ++string_count_;
     } else {
@@ -82,6 +90,10 @@ class ScalarVisitor {
     } else if (int_count_) {
       // TODO(wesm): tighter type later
       return INT64;
+    } else if (date_count_) {
+      return DATE;
+    } else if (timestamp_count_) {
+      return TIMESTAMP_US;
     } else if (bool_count_) {
       return BOOL;
     } else if (string_count_) {
@@ -100,6 +112,8 @@ class ScalarVisitor {
   int64_t none_count_;
   int64_t bool_count_;
   int64_t int_count_;
+  int64_t date_count_;
+  int64_t timestamp_count_;
   int64_t float_count_;
   int64_t string_count_;
 
@@ -297,6 +311,56 @@ class Int64Converter : public TypedConverter<arrow::Int64Builder> {
   }
 };
 
+class DateConverter : public TypedConverter<arrow::DateBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+        typed_builder_->Append(PyDate_to_ms(pydate));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class TimestampConverter : public TypedConverter<arrow::TimestampBuilder> {
+ public:
+  Status AppendData(PyObject* seq) override {
+    Py_ssize_t size = PySequence_Size(seq);
+    RETURN_NOT_OK(typed_builder_->Reserve(size));
+    for (int64_t i = 0; i < size; ++i) {
+      OwnedRef item(PySequence_GetItem(seq, i));
+      if (item.obj() == Py_None) {
+        typed_builder_->AppendNull();
+      } else {
+        PyDateTime_DateTime* pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+        struct tm datetime = {0};
+        datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900;
+        datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1;
+        datetime.tm_mday = PyDateTime_GET_DAY(pydatetime);
+        datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime);
+        datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime);
+        datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime);
+        int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
+        RETURN_IF_PYERROR();
+        struct tm epoch = {0};
+        epoch.tm_year = 70;
+        epoch.tm_mday = 1;
+        // Microseconds since the epoch
+        int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us;
+        typed_builder_->Append(val);
+      }
+    }
+    return Status::OK();
+  }
+};
+
 class DoubleConverter : public TypedConverter<arrow::DoubleBuilder> {
  public:
   Status AppendData(PyObject* seq) override {
@@ -379,6 +443,10 @@ std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
       return std::make_shared<BoolConverter>();
     case Type::INT64:
       return std::make_shared<Int64Converter>();
+    case Type::DATE:
+      return std::make_shared<DateConverter>();
+    case Type::TIMESTAMP:
+      return std::make_shared<TimestampConverter>();
     case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
     case Type::STRING:
@@ -409,6 +477,7 @@ Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) {
 Status ConvertPySequence(PyObject* obj, std::shared_ptr<arrow::Array>* out) {
   std::shared_ptr<DataType> type;
   int64_t size;
+  PyDateTime_IMPORT;
   RETURN_NOT_OK(InferArrowType(obj, &size, &type));
 
   // Handle NA / NullType case

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/src/pyarrow/adapters/pandas.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc
index 64b7086..f8dff6d 100644
--- a/python/src/pyarrow/adapters/pandas.cc
+++ b/python/src/pyarrow/adapters/pandas.cc
@@ -35,6 +35,7 @@
 
 #include "pyarrow/common.h"
 #include "pyarrow/config.h"
+#include "pyarrow/util/datetime.h"
 
 namespace pyarrow {
 
@@ -167,6 +168,28 @@ class ArrowSerializer {
  private:
   Status ConvertData();
 
+  Status ConvertDates(std::shared_ptr<Array>* out) {
+    PyAcquireGIL lock;
+
+    PyObject** objects = reinterpret_cast<PyObject**>(PyArray_DATA(arr_));
+    arrow::TypePtr string_type(new arrow::DateType());
+    arrow::DateBuilder date_builder(pool_, string_type);
+    RETURN_NOT_OK(date_builder.Resize(length_));
+
+    Status s;
+    PyObject* obj;
+    for (int64_t i = 0; i < length_; ++i) {
+      obj = objects[i];
+      if (PyDate_CheckExact(obj)) {
+        PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(obj);
+        date_builder.Append(PyDate_to_ms(pydate));
+      } else {
+        date_builder.AppendNull();
+      }
+    }
+    return date_builder.Finish(out);
+  }
+
   Status ConvertObjectStrings(std::shared_ptr<Array>* out) {
     PyAcquireGIL lock;
 
@@ -369,6 +392,10 @@ inline Status ArrowSerializer<NPY_OBJECT>::Convert(std::shared_ptr<Array>* out)
 
   // TODO: mask not supported here
   const PyObject** objects = reinterpret_cast<const PyObject**>(PyArray_DATA(arr_));
+  {
+    PyAcquireGIL lock;
+    PyDateTime_IMPORT;
+  }
 
   for (int64_t i = 0; i < length_; ++i) {
     if (PyObject_is_null(objects[i])) {
@@ -377,6 +404,8 @@ inline Status ArrowSerializer<NPY_OBJECT>::Convert(std::shared_ptr<Array>* out)
       return ConvertObjectStrings(out);
     } else if (PyBool_Check(objects[i])) {
       return ConvertBooleans(out);
+    } else if (PyDate_CheckExact(objects[i])) {
+      return ConvertDates(out);
     } else {
       return Status::TypeError("unhandled python type");
     }
@@ -548,6 +577,17 @@ struct arrow_traits<arrow::Type::TIMESTAMP> {
 };
 
 template <>
+struct arrow_traits<arrow::Type::DATE> {
+  static constexpr int npy_type = NPY_DATETIME;
+  static constexpr bool supports_nulls = true;
+  static constexpr int64_t na_value = std::numeric_limits<int64_t>::min();
+  static constexpr bool is_boolean = false;
+  static constexpr bool is_pandas_numeric_not_nullable = false;
+  static constexpr bool is_pandas_numeric_nullable = true;
+  typedef typename npy_traits<NPY_DATETIME>::value_type T;
+};
+
+template <>
 struct arrow_traits<arrow::Type::STRING> {
   static constexpr int npy_type = NPY_OBJECT;
   static constexpr bool supports_nulls = true;
@@ -567,24 +607,28 @@ static inline PyObject* make_pystring(const uint8_t* data, int32_t length) {
 
 inline void set_numpy_metadata(int type, DataType* datatype, PyArrayObject* out) {
   if (type == NPY_DATETIME) {
-    auto timestamp_type = static_cast<arrow::TimestampType*>(datatype);
-    // We only support ms resolution at the moment
     PyArray_Descr* descr = PyArray_DESCR(out);
     auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+    if (datatype->type == arrow::Type::TIMESTAMP) {
+      auto timestamp_type = static_cast<arrow::TimestampType*>(datatype);
 
-    switch (timestamp_type->unit) {
-      case arrow::TimestampType::Unit::SECOND:
-        date_dtype->meta.base = NPY_FR_s;
-        break;
-      case arrow::TimestampType::Unit::MILLI:
-        date_dtype->meta.base = NPY_FR_ms;
-        break;
-      case arrow::TimestampType::Unit::MICRO:
-        date_dtype->meta.base = NPY_FR_us;
-        break;
-      case arrow::TimestampType::Unit::NANO:
-        date_dtype->meta.base = NPY_FR_ns;
-        break;
+      switch (timestamp_type->unit) {
+        case arrow::TimestampType::Unit::SECOND:
+          date_dtype->meta.base = NPY_FR_s;
+          break;
+        case arrow::TimestampType::Unit::MILLI:
+          date_dtype->meta.base = NPY_FR_ms;
+          break;
+        case arrow::TimestampType::Unit::MICRO:
+          date_dtype->meta.base = NPY_FR_us;
+          break;
+        case arrow::TimestampType::Unit::NANO:
+          date_dtype->meta.base = NPY_FR_ns;
+          break;
+      }
+    } else {
+      // datatype->type == arrow::Type::DATE
+      date_dtype->meta.base = NPY_FR_D;
     }
   }
 }
@@ -666,7 +710,7 @@ class ArrowDeserializer {
 
   template <int T2>
   inline typename std::enable_if<
-    arrow_traits<T2>::is_pandas_numeric_nullable, Status>::type
+    (T2 != arrow::Type::DATE) & arrow_traits<T2>::is_pandas_numeric_nullable, Status>::type
   ConvertValues(const std::shared_ptr<arrow::ChunkedArray>& data) {
     typedef typename arrow_traits<T2>::T T;
     size_t chunk_offset = 0;
@@ -697,6 +741,32 @@ class ArrowDeserializer {
     return Status::OK();
   }
 
+  template <int T2>
+  inline typename std::enable_if<
+    T2 == arrow::Type::DATE, Status>::type
+  ConvertValues(const std::shared_ptr<arrow::ChunkedArray>& data) {
+    typedef typename arrow_traits<T2>::T T;
+    size_t chunk_offset = 0;
+
+    RETURN_NOT_OK(AllocateOutput(arrow_traits<T2>::npy_type));
+
+    for (int c = 0; c < data->num_chunks(); c++) {
+      const std::shared_ptr<Array> arr = data->chunk(c);
+      auto prim_arr = static_cast<arrow::PrimitiveArray*>(arr.get());
+      auto in_values = reinterpret_cast<const T*>(prim_arr->data()->data());
+      auto out_values = reinterpret_cast<T*>(PyArray_DATA(out_)) + chunk_offset;
+
+      for (int64_t i = 0; i < arr->length(); ++i) {
+        // There are 1000 * 60 * 60 * 24 = 86400000ms in a day
+        out_values[i] = arr->IsNull(i) ? arrow_traits<T2>::na_value : in_values[i] / 86400000;
+      }
+
+      chunk_offset += arr->length();
+    }
+
+    return Status::OK();
+  }
+
   // Integer specialization
   template <int T2>
   inline typename std::enable_if<
@@ -879,6 +949,7 @@ Status ConvertColumnToPandas(const std::shared_ptr<Column>& col, PyObject* py_re
     FROM_ARROW_CASE(FLOAT);
     FROM_ARROW_CASE(DOUBLE);
     FROM_ARROW_CASE(STRING);
+    FROM_ARROW_CASE(DATE);
     FROM_ARROW_CASE(TIMESTAMP);
     default:
       return Status::NotImplemented("Arrow type reading not implemented");

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/src/pyarrow/helpers.cc
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.cc b/python/src/pyarrow/helpers.cc
index 08003aa..af92744 100644
--- a/python/src/pyarrow/helpers.cc
+++ b/python/src/pyarrow/helpers.cc
@@ -33,6 +33,8 @@ const std::shared_ptr<Int8Type> INT8 = std::make_shared<Int8Type>();
 const std::shared_ptr<Int16Type> INT16 = std::make_shared<Int16Type>();
 const std::shared_ptr<Int32Type> INT32 = std::make_shared<Int32Type>();
 const std::shared_ptr<Int64Type> INT64 = std::make_shared<Int64Type>();
+const std::shared_ptr<DateType> DATE = std::make_shared<DateType>();
+const std::shared_ptr<TimestampType> TIMESTAMP_US = std::make_shared<TimestampType>(TimeUnit::MICRO);
 const std::shared_ptr<FloatType> FLOAT = std::make_shared<FloatType>();
 const std::shared_ptr<DoubleType> DOUBLE = std::make_shared<DoubleType>();
 const std::shared_ptr<StringType> STRING = std::make_shared<StringType>();
@@ -54,6 +56,10 @@ std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
     GET_PRIMITIVE_TYPE(INT32, Int32Type);
     GET_PRIMITIVE_TYPE(UINT64, UInt64Type);
     GET_PRIMITIVE_TYPE(INT64, Int64Type);
+    GET_PRIMITIVE_TYPE(DATE, DateType);
+    case Type::TIMESTAMP:
+      return TIMESTAMP_US;
+      break;
     GET_PRIMITIVE_TYPE(BOOL, BooleanType);
     GET_PRIMITIVE_TYPE(FLOAT, FloatType);
     GET_PRIMITIVE_TYPE(DOUBLE, DoubleType);

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/src/pyarrow/helpers.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/helpers.h b/python/src/pyarrow/helpers.h
index fa9c713..e714bba 100644
--- a/python/src/pyarrow/helpers.h
+++ b/python/src/pyarrow/helpers.h
@@ -38,6 +38,8 @@ extern const std::shared_ptr<arrow::Int8Type> INT8;
 extern const std::shared_ptr<arrow::Int16Type> INT16;
 extern const std::shared_ptr<arrow::Int32Type> INT32;
 extern const std::shared_ptr<arrow::Int64Type> INT64;
+extern const std::shared_ptr<arrow::DateType> DATE;
+extern const std::shared_ptr<arrow::TimestampType> TIMESTAMP_US;
 extern const std::shared_ptr<arrow::FloatType> FLOAT;
 extern const std::shared_ptr<arrow::DoubleType> DOUBLE;
 extern const std::shared_ptr<arrow::StringType> STRING;

http://git-wip-us.apache.org/repos/asf/arrow/blob/d7845fcd/python/src/pyarrow/util/datetime.h
----------------------------------------------------------------------
diff --git a/python/src/pyarrow/util/datetime.h b/python/src/pyarrow/util/datetime.h
new file mode 100644
index 0000000..b67accc
--- /dev/null
+++ b/python/src/pyarrow/util/datetime.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef PYARROW_UTIL_DATETIME_H
+#define PYARROW_UTIL_DATETIME_H
+
+#include <Python.h>
+#include <datetime.h>
+
+namespace pyarrow {
+  
+inline int64_t PyDate_to_ms(PyDateTime_Date* pydate) { 
+  struct tm date = {0};
+  date.tm_year = PyDateTime_GET_YEAR(pydate) - 1900;
+  date.tm_mon = PyDateTime_GET_MONTH(pydate) - 1;
+  date.tm_mday = PyDateTime_GET_DAY(pydate);
+  struct tm epoch = {0};
+  epoch.tm_year = 70;
+  epoch.tm_mday = 1;
+  // Milliseconds since the epoch
+  return lrint(difftime(mktime(&date), mktime(&epoch)) * 1000);
+}
+
+} // namespace pyarrow
+
+#endif // PYARROW_UTIL_DATETIME_H