You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/01/28 16:31:07 UTC

[arrow] branch master updated: ARROW-1646: [Python] Handle NumPy scalar types

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0543953  ARROW-1646: [Python] Handle NumPy scalar types
0543953 is described below

commit 05439532e70c105f8f282e2963dc31e0340ec503
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Sun Jan 28 17:30:59 2018 +0100

    ARROW-1646: [Python] Handle NumPy scalar types
    
    Author: Korn, Uwe <Uw...@blue-yonder.com>
    Author: Uwe L. Korn <uw...@xhochy.com>
    
    Closes #1475 from xhochy/ARROW-1646 and squashes the following commits:
    
    7d85879 [Uwe L. Korn] flake8
    eb4c08d [Korn, Uwe] ARROW-1646: [Python] pyarrow.array cannot handle NumPy scalar types
---
 cpp/src/arrow/python/builtin_convert.cc      |  34 ++
 cpp/src/arrow/python/numpy_convert.cc        |   3 +
 cpp/src/arrow/python/numpy_convert.h         |   2 +
 cpp/src/arrow/python/numpy_interop.h         |   1 +
 python/pyarrow/tests/test_convert_builtin.py | 805 +++++++++++++++------------
 5 files changed, 492 insertions(+), 353 deletions(-)

diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 71f2fde..f7a370c 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -32,6 +32,7 @@
 #include "arrow/util/logging.h"
 
 #include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
 #include "arrow/python/util/datetime.h"
 
 namespace arrow {
@@ -93,6 +94,21 @@ class ScalarVisitor {
       ++binary_count_;
     } else if (PyUnicode_Check(obj)) {
       ++unicode_count_;
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      std::shared_ptr<DataType> type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &type));
+      if (is_integer(type->id())) {
+        ++int_count_;
+      } else if (is_floating(type->id())) {
+        ++float_count_;
+      } else if (type->id() == Type::TIMESTAMP) {
+        ++timestamp_count_;
+      } else {
+        std::ostringstream ss;
+        ss << "Found a NumPy scalar with Arrow dtype that we cannot handle: ";
+        ss << type->ToString();
+        return Status::Invalid(ss.str());
+      }
     } else {
       // TODO(wesm): accumulate error information somewhere
       static std::string supported_types =
@@ -575,6 +591,24 @@ class TimestampConverter
           t = PyDateTime_to_ns(pydatetime);
           break;
       }
+    } else if (PyArray_CheckAnyScalarExact(item.obj())) {
+      // numpy.datetime64
+      std::shared_ptr<DataType> type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(item.obj()), &type));
+      if (type->id() != Type::TIMESTAMP) {
+        std::ostringstream ss;
+        ss << "Expected np.datetime64 but got: ";
+        ss << type->ToString();
+        return Status::Invalid(ss.str());
+      }
+      const TimestampType& ttype = static_cast<const TimestampType&>(*type);
+      if (unit_ != ttype.unit()) {
+        return Status::NotImplemented(
+            "Cannot convert NumPy datetime64 objects with differing unit");
+      }
+
+      PyDatetimeScalarObject* obj = reinterpret_cast<PyDatetimeScalarObject*>(item.obj());
+      t = obj->obval;
     } else {
       t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
       RETURN_IF_PYERROR();
diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc
index 124745e..c2d055f 100644
--- a/cpp/src/arrow/python/numpy_convert.cc
+++ b/cpp/src/arrow/python/numpy_convert.cc
@@ -152,7 +152,10 @@ Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
     return Status::TypeError("Did not pass numpy.dtype object");
   }
   PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
+  return NumPyDtypeToArrow(descr, out);
+}
 
+Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
   int type_num = cast_npy_type_compat(descr->type_num);
 
   switch (type_num) {
diff --git a/cpp/src/arrow/python/numpy_convert.h b/cpp/src/arrow/python/numpy_convert.h
index 93c4848..220e38f 100644
--- a/cpp/src/arrow/python/numpy_convert.h
+++ b/cpp/src/arrow/python/numpy_convert.h
@@ -56,6 +56,8 @@ bool is_contiguous(PyObject* array);
 
 ARROW_EXPORT
 Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out);
+ARROW_EXPORT
+Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out);
 
 Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out);
 Status GetNumPyType(const DataType& type, int* type_num);
diff --git a/cpp/src/arrow/python/numpy_interop.h b/cpp/src/arrow/python/numpy_interop.h
index b93200c..8c569e2 100644
--- a/cpp/src/arrow/python/numpy_interop.h
+++ b/cpp/src/arrow/python/numpy_interop.h
@@ -40,6 +40,7 @@
 #endif
 
 #include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
 #include <numpy/ufuncobject.h>
 
 namespace arrow {
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index d7760da..fa603b1 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -23,6 +23,8 @@ import pyarrow as pa
 
 import datetime
 import decimal
+import numpy as np
+import six
 
 
 class StrangeIterable:
@@ -33,356 +35,453 @@ class StrangeIterable:
         return self.lst.__iter__()
 
 
-class TestConvertIterable(unittest.TestCase):
-
-    def test_iterable_types(self):
-        arr1 = pa.array(StrangeIterable([0, 1, 2, 3]))
-        arr2 = pa.array((0, 1, 2, 3))
-
-        assert arr1.equals(arr2)
-
-    def test_empty_iterable(self):
-        arr = pa.array(StrangeIterable([]))
-        assert len(arr) == 0
-        assert arr.null_count == 0
-        assert arr.type == pa.null()
-        assert arr.to_pylist() == []
-
-
-class TestLimitedConvertIterator(unittest.TestCase):
-    def test_iterator_types(self):
-        arr1 = pa.array(iter(range(3)), type=pa.int64(), size=3)
-        arr2 = pa.array((0, 1, 2))
-        assert arr1.equals(arr2)
-
-    def test_iterator_size_overflow(self):
-        arr1 = pa.array(iter(range(3)), type=pa.int64(), size=2)
-        arr2 = pa.array((0, 1))
-        assert arr1.equals(arr2)
-
-    def test_iterator_size_underflow(self):
-        arr1 = pa.array(iter(range(3)), type=pa.int64(), size=10)
-        arr2 = pa.array((0, 1, 2))
-        assert arr1.equals(arr2)
-
-
-class TestConvertSequence(unittest.TestCase):
-
-    def test_sequence_types(self):
-        arr1 = pa.array([1, 2, 3])
-        arr2 = pa.array((1, 2, 3))
-
-        assert arr1.equals(arr2)
-
-    def test_boolean(self):
-        expected = [True, None, False, None]
-        arr = pa.array(expected)
-        assert len(arr) == 4
-        assert arr.null_count == 2
-        assert arr.type == pa.bool_()
-        assert arr.to_pylist() == expected
-
-    def test_empty_list(self):
-        arr = pa.array([])
-        assert len(arr) == 0
-        assert arr.null_count == 0
-        assert arr.type == pa.null()
-        assert arr.to_pylist() == []
-
-    def test_all_none(self):
-        arr = pa.array([None, None])
-        assert len(arr) == 2
-        assert arr.null_count == 2
-        assert arr.type == pa.null()
-        assert arr.to_pylist() == [None, None]
-
-    def test_integer(self):
-        expected = [1, None, 3, None]
-        arr = pa.array(expected)
-        assert len(arr) == 4
-        assert arr.null_count == 2
-        assert arr.type == pa.int64()
-        assert arr.to_pylist() == expected
-
-    def test_garbage_collection(self):
-        import gc
-
-        # Force the cyclic garbage collector to run
-        gc.collect()
-
-        bytes_before = pa.total_allocated_bytes()
-        pa.array([1, None, 3, None])
-        gc.collect()
-        assert pa.total_allocated_bytes() == bytes_before
-
-    def test_double(self):
-        data = [1.5, 1, None, 2.5, None, None]
-        arr = pa.array(data)
-        assert len(arr) == 6
-        assert arr.null_count == 3
-        assert arr.type == pa.float64()
-        assert arr.to_pylist() == data
-
-    def test_unicode(self):
-        data = [u'foo', u'bar', None, u'mañana']
-        arr = pa.array(data)
-        assert len(arr) == 4
-        assert arr.null_count == 1
-        assert arr.type == pa.string()
-        assert arr.to_pylist() == data
-
-    def test_bytes(self):
-        u1 = b'ma\xc3\xb1ana'
-        data = [b'foo',
-                u1.decode('utf-8'),  # unicode gets encoded,
-                None]
-        arr = pa.array(data)
-        assert len(arr) == 3
-        assert arr.null_count == 1
-        assert arr.type == pa.binary()
-        assert arr.to_pylist() == [b'foo', u1, None]
-
-    def test_utf8_to_unicode(self):
-        # ARROW-1225
-        data = [b'foo', None, b'bar']
-        arr = pa.array(data, type=pa.string())
-        assert arr[0].as_py() == u'foo'
-
-        # test a non-utf8 unicode string
-        val = (u'mañana').encode('utf-16-le')
-        with pytest.raises(pa.ArrowException):
-            pa.array([val], type=pa.string())
-
-    def test_fixed_size_bytes(self):
-        data = [b'foof', None, b'barb', b'2346']
-        arr = pa.array(data, type=pa.binary(4))
-        assert len(arr) == 4
-        assert arr.null_count == 1
-        assert arr.type == pa.binary(4)
-        assert arr.to_pylist() == data
-
-    def test_fixed_size_bytes_does_not_accept_varying_lengths(self):
-        data = [b'foo', None, b'barb', b'2346']
-        with self.assertRaises(pa.ArrowInvalid):
-            pa.array(data, type=pa.binary(4))
-
-    def test_date(self):
-        data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
-                datetime.date(2040, 2, 26)]
-        arr = pa.array(data)
-        assert len(arr) == 4
-        assert arr.type == pa.date64()
-        assert arr.null_count == 1
-        assert arr[0].as_py() == datetime.date(2000, 1, 1)
-        assert arr[1].as_py() is None
-        assert arr[2].as_py() == datetime.date(1970, 1, 1)
-        assert arr[3].as_py() == datetime.date(2040, 2, 26)
-
-    def test_date32(self):
-        data = [datetime.date(2000, 1, 1), None]
-        arr = pa.array(data, type=pa.date32())
-
-        data2 = [10957, None]
-        arr2 = pa.array(data2, type=pa.date32())
-
-        for x in [arr, arr2]:
-            assert len(x) == 2
-            assert x.type == pa.date32()
-            assert x.null_count == 1
-            assert x[0].as_py() == datetime.date(2000, 1, 1)
-            assert x[1] is pa.NA
-
-        # Overflow
-        data3 = [2**32, None]
-        with pytest.raises(pa.ArrowException):
-            pa.array(data3, type=pa.date32())
-
-    def test_timestamp(self):
-        data = [
-            datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
-            None,
-            datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
-            datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
-        ]
-        arr = pa.array(data)
-        assert len(arr) == 4
-        assert arr.type == pa.timestamp('us')
-        assert arr.null_count == 1
-        assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
-                                                   23, 34, 123456)
-        assert arr[1].as_py() is None
-        assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12,
-                                                   34, 56, 432539)
-        assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5,
-                                                   46, 57, 437699)
-
-    def test_timestamp_with_unit(self):
-        data = [
-            datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
-        ]
-
-        s = pa.timestamp('s')
-        ms = pa.timestamp('ms')
-        us = pa.timestamp('us')
-        ns = pa.timestamp('ns')
-
-        arr_s = pa.array(data, type=s)
-        assert len(arr_s) == 1
-        assert arr_s.type == s
-        assert arr_s[0].as_py() == datetime.datetime(2007, 7, 13, 1,
-                                                     23, 34, 0)
-
-        arr_ms = pa.array(data, type=ms)
-        assert len(arr_ms) == 1
-        assert arr_ms.type == ms
-        assert arr_ms[0].as_py() == datetime.datetime(2007, 7, 13, 1,
-                                                      23, 34, 123000)
-
-        arr_us = pa.array(data, type=us)
-        assert len(arr_us) == 1
-        assert arr_us.type == us
-        assert arr_us[0].as_py() == datetime.datetime(2007, 7, 13, 1,
-                                                      23, 34, 123456)
-
-        arr_ns = pa.array(data, type=ns)
-        assert len(arr_ns) == 1
-        assert arr_ns.type == ns
-        assert arr_ns[0].as_py() == datetime.datetime(2007, 7, 13, 1,
-                                                      23, 34, 123456)
-
-    def test_timestamp_from_int_with_unit(self):
-        data = [1]
-
-        s = pa.timestamp('s')
-        ms = pa.timestamp('ms')
-        us = pa.timestamp('us')
-        ns = pa.timestamp('ns')
-
-        arr_s = pa.array(data, type=s)
-        assert len(arr_s) == 1
-        assert arr_s.type == s
-        assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"
-
-        arr_ms = pa.array(data, type=ms)
-        assert len(arr_ms) == 1
-        assert arr_ms.type == ms
-        assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"
-
-        arr_us = pa.array(data, type=us)
-        assert len(arr_us) == 1
-        assert arr_us.type == us
-        assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"
-
-        arr_ns = pa.array(data, type=ns)
-        assert len(arr_ns) == 1
-        assert arr_ns.type == ns
-        assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
-
-        with pytest.raises(pa.ArrowException):
-            class CustomClass():
-                pass
-            pa.array([1, CustomClass()], type=ns)
-            pa.array([1, CustomClass()], type=pa.date32())
-            pa.array([1, CustomClass()], type=pa.date64())
-
-    def test_mixed_nesting_levels(self):
-        pa.array([1, 2, None])
-        pa.array([[1], [2], None])
-        pa.array([[1], [2], [None]])
-
-        with self.assertRaises(pa.ArrowInvalid):
-            pa.array([1, 2, [1]])
-
-        with self.assertRaises(pa.ArrowInvalid):
-            pa.array([1, 2, []])
-
-        with self.assertRaises(pa.ArrowInvalid):
-            pa.array([[1], [2], [None, [1]]])
-
-    def test_list_of_int(self):
-        data = [[1, 2, 3], [], None, [1, 2]]
-        arr = pa.array(data)
-        assert len(arr) == 4
-        assert arr.null_count == 1
-        assert arr.type == pa.list_(pa.int64())
-        assert arr.to_pylist() == data
-
-    def test_mixed_types_fails(self):
-        data = ['a', 1, 2.0]
-        with self.assertRaises(pa.ArrowException):
-            pa.array(data)
-
-    def test_mixed_types_with_specified_type_fails(self):
-        data = ['-10', '-5', {'a': 1}, '0', '5', '10']
-
-        type = pa.string()
-        with self.assertRaises(pa.ArrowInvalid):
-            pa.array(data, type=type)
-
-    def test_decimal(self):
-        data = [decimal.Decimal('1234.183'), decimal.Decimal('8094.234')]
-        type = pa.decimal128(precision=7, scale=3)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_decimal_different_precisions(self):
-        data = [
-            decimal.Decimal('1234234983.183'), decimal.Decimal('80943244.234')
-        ]
-        type = pa.decimal128(precision=13, scale=3)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_decimal_no_scale(self):
-        data = [decimal.Decimal('1234234983'), decimal.Decimal('8094324')]
-        type = pa.decimal128(precision=10)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_decimal_negative(self):
-        data = [decimal.Decimal('-1234.234983'), decimal.Decimal('-8.094324')]
-        type = pa.decimal128(precision=10, scale=6)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_decimal_no_whole_part(self):
-        data = [decimal.Decimal('-.4234983'), decimal.Decimal('.0103943')]
-        type = pa.decimal128(precision=7, scale=7)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_decimal_large_integer(self):
-        data = [decimal.Decimal('-394029506937548693.42983'),
-                decimal.Decimal('32358695912932.01033')]
-        type = pa.decimal128(precision=23, scale=5)
-        arr = pa.array(data, type=type)
-        assert arr.to_pylist() == data
-
-    def test_range_types(self):
-        arr1 = pa.array(range(3))
-        arr2 = pa.array((0, 1, 2))
-        assert arr1.equals(arr2)
-
-    def test_empty_range(self):
-        arr = pa.array(range(0))
-        assert len(arr) == 0
-        assert arr.null_count == 0
-        assert arr.type == pa.null()
-        assert arr.to_pylist() == []
-
-    def test_structarray(self):
-        ints = pa.array([None, 2, 3], type=pa.int64())
-        strs = pa.array([u'a', None, u'c'], type=pa.string())
-        bools = pa.array([True, False, None], type=pa.bool_())
-        arr = pa.StructArray.from_arrays(
-            ['ints', 'strs', 'bools'],
-            [ints, strs, bools])
-
-        expected = [
-            {'ints': None, 'strs': u'a', 'bools': True},
-            {'ints': 2, 'strs': None, 'bools': False},
-            {'ints': 3, 'strs': u'c', 'bools': None},
-        ]
-
-        pylist = arr.to_pylist()
-        assert pylist == expected, (pylist, expected)
+def test_iterable_types():
+    arr1 = pa.array(StrangeIterable([0, 1, 2, 3]))
+    arr2 = pa.array((0, 1, 2, 3))
+
+    assert arr1.equals(arr2)
+
+
+def test_empty_iterable():
+    arr = pa.array(StrangeIterable([]))
+    assert len(arr) == 0
+    assert arr.null_count == 0
+    assert arr.type == pa.null()
+    assert arr.to_pylist() == []
+
+
+def test_limited_iterator_types():
+    arr1 = pa.array(iter(range(3)), type=pa.int64(), size=3)
+    arr2 = pa.array((0, 1, 2))
+    assert arr1.equals(arr2)
+
+
+def test_limited_iterator_size_overflow():
+    arr1 = pa.array(iter(range(3)), type=pa.int64(), size=2)
+    arr2 = pa.array((0, 1))
+    assert arr1.equals(arr2)
+
+
+def test_limited_iterator_size_underflow():
+    arr1 = pa.array(iter(range(3)), type=pa.int64(), size=10)
+    arr2 = pa.array((0, 1, 2))
+    assert arr1.equals(arr2)
+
+
+def _as_list(xs):
+    return xs
+
+
+def _as_tuple(xs):
+    return tuple(xs)
+
+
+def _as_dict_values(xs):
+    dct = {k: v for k, v in enumerate(xs)}
+    return six.viewvalues(dct)
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_types(seq):
+    arr1 = pa.array(seq([1, 2, 3]))
+    arr2 = pa.array([1, 2, 3])
+
+    assert arr1.equals(arr2)
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_boolean(seq):
+    expected = [True, None, False, None]
+    arr = pa.array(seq(expected))
+    assert len(arr) == 4
+    assert arr.null_count == 2
+    assert arr.type == pa.bool_()
+    assert arr.to_pylist() == expected
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_numpy_boolean(seq):
+    expected = [np.bool(True), None, np.bool(False), None]
+    arr = pa.array(seq(expected))
+    assert len(arr) == 4
+    assert arr.null_count == 2
+    assert arr.type == pa.bool_()
+    assert arr.to_pylist() == expected
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_empty_list(seq):
+    arr = pa.array(seq([]))
+    assert len(arr) == 0
+    assert arr.null_count == 0
+    assert arr.type == pa.null()
+    assert arr.to_pylist() == []
+
+
+def test_sequence_all_none():
+    arr = pa.array([None, None])
+    assert len(arr) == 2
+    assert arr.null_count == 2
+    assert arr.type == pa.null()
+    assert arr.to_pylist() == [None, None]
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+def test_sequence_integer(seq):
+    expected = [1, None, 3, None]
+    arr = pa.array(seq(expected))
+    assert len(arr) == 4
+    assert arr.null_count == 2
+    assert arr.type == pa.int64()
+    assert arr.to_pylist() == expected
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+@pytest.mark.parametrize("np_scalar", [np.int16, np.int32, np.int64, np.uint16,
+                                       np.uint32, np.uint64])
+def test_sequence_numpy_integer(seq, np_scalar):
+    expected = [np_scalar(1), None, np_scalar(3), None]
+    arr = pa.array(seq(expected))
+    assert len(arr) == 4
+    assert arr.null_count == 2
+    assert arr.type == pa.int64()
+    assert arr.to_pylist() == expected
+
+
+def test_garbage_collection():
+    import gc
+
+    # Force the cyclic garbage collector to run
+    gc.collect()
+
+    bytes_before = pa.total_allocated_bytes()
+    pa.array([1, None, 3, None])
+    gc.collect()
+    assert pa.total_allocated_bytes() == bytes_before
+
+
+def test_sequence_double():
+    data = [1.5, 1, None, 2.5, None, None]
+    arr = pa.array(data)
+    assert len(arr) == 6
+    assert arr.null_count == 3
+    assert arr.type == pa.float64()
+    assert arr.to_pylist() == data
+
+
+@pytest.mark.parametrize("seq", [_as_list, _as_tuple, _as_dict_values])
+@pytest.mark.parametrize("np_scalar", [np.float16, np.float32, np.float64])
+def test_sequence_numpy_double(seq, np_scalar):
+    data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, None]
+    arr = pa.array(seq(data))
+    assert len(arr) == 6
+    assert arr.null_count == 3
+    assert arr.type == pa.float64()
+    assert arr.to_pylist() == data
+
+
+def test_sequence_unicode():
+    data = [u'foo', u'bar', None, u'mañana']
+    arr = pa.array(data)
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == pa.string()
+    assert arr.to_pylist() == data
+
+
+def test_sequence_bytes():
+    u1 = b'ma\xc3\xb1ana'
+    data = [b'foo',
+            u1.decode('utf-8'),  # unicode gets encoded,
+            None]
+    arr = pa.array(data)
+    assert len(arr) == 3
+    assert arr.null_count == 1
+    assert arr.type == pa.binary()
+    assert arr.to_pylist() == [b'foo', u1, None]
+
+
+def test_sequence_utf8_to_unicode():
+    # ARROW-1225
+    data = [b'foo', None, b'bar']
+    arr = pa.array(data, type=pa.string())
+    assert arr[0].as_py() == u'foo'
+
+    # test a non-utf8 unicode string
+    val = (u'mañana').encode('utf-16-le')
+    with pytest.raises(pa.ArrowException):
+        pa.array([val], type=pa.string())
+
+
+def test_sequence_fixed_size_bytes():
+    data = [b'foof', None, b'barb', b'2346']
+    arr = pa.array(data, type=pa.binary(4))
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == pa.binary(4)
+    assert arr.to_pylist() == data
+
+
+def test_fixed_size_bytes_does_not_accept_varying_lengths():
+    data = [b'foo', None, b'barb', b'2346']
+    with pytest.raises(pa.ArrowInvalid):
+        pa.array(data, type=pa.binary(4))
+
+
+def test_sequence_date():
+    data = [datetime.date(2000, 1, 1), None, datetime.date(1970, 1, 1),
+            datetime.date(2040, 2, 26)]
+    arr = pa.array(data)
+    assert len(arr) == 4
+    assert arr.type == pa.date64()
+    assert arr.null_count == 1
+    assert arr[0].as_py() == datetime.date(2000, 1, 1)
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == datetime.date(1970, 1, 1)
+    assert arr[3].as_py() == datetime.date(2040, 2, 26)
+
+
+def test_sequence_date32():
+    data = [datetime.date(2000, 1, 1), None]
+    arr = pa.array(data, type=pa.date32())
+
+    data2 = [10957, None]
+    arr2 = pa.array(data2, type=pa.date32())
+
+    for x in [arr, arr2]:
+        assert len(x) == 2
+        assert x.type == pa.date32()
+        assert x.null_count == 1
+        assert x[0].as_py() == datetime.date(2000, 1, 1)
+        assert x[1] is pa.NA
+
+    # Overflow
+    data3 = [2**32, None]
+    with pytest.raises(pa.ArrowException):
+        pa.array(data3, type=pa.date32())
+
+
+def test_sequence_timestamp():
+    data = [
+        datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
+        None,
+        datetime.datetime(2006, 1, 13, 12, 34, 56, 432539),
+        datetime.datetime(2010, 8, 13, 5, 46, 57, 437699)
+    ]
+    arr = pa.array(data)
+    assert len(arr) == 4
+    assert arr.type == pa.timestamp('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                               23, 34, 123456)
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12,
+                                               34, 56, 432539)
+    assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5,
+                                               46, 57, 437699)
+
+
+def test_sequence_numpy_timestamp():
+    data = [
+        np.datetime64(datetime.datetime(2007, 7, 13, 1, 23, 34, 123456)),
+        None,
+        np.datetime64(datetime.datetime(2006, 1, 13, 12, 34, 56, 432539)),
+        np.datetime64(datetime.datetime(2010, 8, 13, 5, 46, 57, 437699))
+    ]
+    arr = pa.array(data)
+    assert len(arr) == 4
+    assert arr.type == pa.timestamp('us')
+    assert arr.null_count == 1
+    assert arr[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                               23, 34, 123456)
+    assert arr[1].as_py() is None
+    assert arr[2].as_py() == datetime.datetime(2006, 1, 13, 12,
+                                               34, 56, 432539)
+    assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5,
+                                               46, 57, 437699)
+
+
+def test_sequence_timestamp_with_unit():
+    data = [
+        datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
+    ]
+
+    s = pa.timestamp('s')
+    ms = pa.timestamp('ms')
+    us = pa.timestamp('us')
+    ns = pa.timestamp('ns')
+
+    arr_s = pa.array(data, type=s)
+    assert len(arr_s) == 1
+    assert arr_s.type == s
+    assert arr_s[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                 23, 34, 0)
+
+    arr_ms = pa.array(data, type=ms)
+    assert len(arr_ms) == 1
+    assert arr_ms.type == ms
+    assert arr_ms[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                  23, 34, 123000)
+
+    arr_us = pa.array(data, type=us)
+    assert len(arr_us) == 1
+    assert arr_us.type == us
+    assert arr_us[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                  23, 34, 123456)
+
+    arr_ns = pa.array(data, type=ns)
+    assert len(arr_ns) == 1
+    assert arr_ns.type == ns
+    assert arr_ns[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                  23, 34, 123456)
+
+
+def test_sequence_timestamp_from_int_with_unit():
+    data = [1]
+
+    s = pa.timestamp('s')
+    ms = pa.timestamp('ms')
+    us = pa.timestamp('us')
+    ns = pa.timestamp('ns')
+
+    arr_s = pa.array(data, type=s)
+    assert len(arr_s) == 1
+    assert arr_s.type == s
+    assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"
+
+    arr_ms = pa.array(data, type=ms)
+    assert len(arr_ms) == 1
+    assert arr_ms.type == ms
+    assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"
+
+    arr_us = pa.array(data, type=us)
+    assert len(arr_us) == 1
+    assert arr_us.type == us
+    assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"
+
+    arr_ns = pa.array(data, type=ns)
+    assert len(arr_ns) == 1
+    assert arr_ns.type == ns
+    assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
+
+    with pytest.raises(pa.ArrowException):
+        class CustomClass():
+            pass
+        pa.array([1, CustomClass()], type=ns)
+        pa.array([1, CustomClass()], type=pa.date32())
+        pa.array([1, CustomClass()], type=pa.date64())
+
+
+def test_sequence_mixed_nesting_levels():
+    pa.array([1, 2, None])
+    pa.array([[1], [2], None])
+    pa.array([[1], [2], [None]])
+
+    with pytest.raises(pa.ArrowInvalid):
+        pa.array([1, 2, [1]])
+
+    with pytest.raises(pa.ArrowInvalid):
+        pa.array([1, 2, []])
+
+    with pytest.raises(pa.ArrowInvalid):
+        pa.array([[1], [2], [None, [1]]])
+
+
+def test_sequence_list_of_int():
+    data = [[1, 2, 3], [], None, [1, 2]]
+    arr = pa.array(data)
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == pa.list_(pa.int64())
+    assert arr.to_pylist() == data
+
+
+def test_sequence_mixed_types_fails():
+    data = ['a', 1, 2.0]
+    with pytest.raises(pa.ArrowException):
+        pa.array(data)
+
+
+def test_sequence_mixed_types_with_specified_type_fails():
+    data = ['-10', '-5', {'a': 1}, '0', '5', '10']
+
+    type = pa.string()
+    with pytest.raises(pa.ArrowInvalid):
+        pa.array(data, type=type)
+
+
+def test_sequence_decimal():
+    data = [decimal.Decimal('1234.183'), decimal.Decimal('8094.234')]
+    type = pa.decimal128(precision=7, scale=3)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_different_precisions():
+    data = [
+        decimal.Decimal('1234234983.183'), decimal.Decimal('80943244.234')
+    ]
+    type = pa.decimal128(precision=13, scale=3)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_no_scale():
+    data = [decimal.Decimal('1234234983'), decimal.Decimal('8094324')]
+    type = pa.decimal128(precision=10)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_negative():
+    data = [decimal.Decimal('-1234.234983'), decimal.Decimal('-8.094324')]
+    type = pa.decimal128(precision=10, scale=6)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_no_whole_part():
+    data = [decimal.Decimal('-.4234983'), decimal.Decimal('.0103943')]
+    type = pa.decimal128(precision=7, scale=7)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_sequence_decimal_large_integer():
+    data = [decimal.Decimal('-394029506937548693.42983'),
+            decimal.Decimal('32358695912932.01033')]
+    type = pa.decimal128(precision=23, scale=5)
+    arr = pa.array(data, type=type)
+    assert arr.to_pylist() == data
+
+
+def test_range_types():
+    arr1 = pa.array(range(3))
+    arr2 = pa.array((0, 1, 2))
+    assert arr1.equals(arr2)
+
+
+def test_empty_range():
+    arr = pa.array(range(0))
+    assert len(arr) == 0
+    assert arr.null_count == 0
+    assert arr.type == pa.null()
+    assert arr.to_pylist() == []
+
+
+def test_structarray():
+    ints = pa.array([None, 2, 3], type=pa.int64())
+    strs = pa.array([u'a', None, u'c'], type=pa.string())
+    bools = pa.array([True, False, None], type=pa.bool_())
+    arr = pa.StructArray.from_arrays(
+        ['ints', 'strs', 'bools'],
+        [ints, strs, bools])
+
+    expected = [
+        {'ints': None, 'strs': u'a', 'bools': True},
+        {'ints': 2, 'strs': None, 'bools': False},
+        {'ints': 3, 'strs': u'c', 'bools': None},
+    ]
+
+    pylist = arr.to_pylist()
+    assert pylist == expected, (pylist, expected)

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.