You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/07/17 12:54:31 UTC

[arrow] branch master updated: ARROW-2806: [C++/Python] More consistent null/nan handling

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3419058  ARROW-2806: [C++/Python] More consistent null/nan handling
3419058 is described below

commit 34190587a6d5c1fa49c2d6025b1f305b4f169d75
Author: Korn, Uwe <Uw...@blue-yonder.com>
AuthorDate: Tue Jul 17 14:54:21 2018 +0200

    ARROW-2806: [C++/Python] More consistent null/nan handling
    
    I'll take care of the cast issues mentioned in the ticket in a separate PR, already opened https://issues.apache.org/jira/browse/ARROW-2854 for them.
    
    Author: Korn, Uwe <Uw...@blue-yonder.com>
    
    Closes #2270 from xhochy/ARROW-2806 and squashes the following commits:
    
    418f3fb0 <Korn, Uwe> ARROW-2806:  More consistent null/nan handling
---
 cpp/cmake_modules/FindClangTools.cmake       |   6 +-
 cpp/src/arrow/python/builtin_convert.cc      | 179 +++++++++++++++++++++------
 cpp/src/arrow/python/builtin_convert.h       |  12 +-
 cpp/src/arrow/python/numpy_to_arrow.cc       |   9 +-
 cpp/src/arrow/python/python-test.cc          |  10 +-
 python/doc/source/data.rst                   |  15 +++
 python/pyarrow/array.pxi                     |  17 ++-
 python/pyarrow/includes/libarrow.pxd         |   7 +-
 python/pyarrow/tests/test_convert_builtin.py |  74 ++++++++++-
 9 files changed, 262 insertions(+), 67 deletions(-)

diff --git a/cpp/cmake_modules/FindClangTools.cmake b/cpp/cmake_modules/FindClangTools.cmake
index 7d1d2d1..215a5cd 100644
--- a/cpp/cmake_modules/FindClangTools.cmake
+++ b/cpp/cmake_modules/FindClangTools.cmake
@@ -86,12 +86,10 @@ if (CLANG_FORMAT_VERSION)
 
         if ("${CLANG_FORMAT_BIN}" STREQUAL "CLANG_FORMAT_BIN-NOTFOUND")
           # binary was still not found, look into Cellar
-          # TODO: This currently only works for '.0' patch releases as
-          #       find_program does not support regular expressions
-          #       in the paths.
+          file(GLOB CLANG_FORMAT_PATH "${HOMEBREW_PREFIX}/Cellar/llvm/${CLANG_FORMAT_VERSION}.*")
           find_program(CLANG_FORMAT_BIN
             NAMES clang-format
-            PATHS "${HOMEBREW_PREFIX}/Cellar/llvm/${CLANG_FORMAT_VERSION}.0/bin"
+            PATHS "${CLANG_FORMAT_PATH}/bin"
                   NO_DEFAULT_PATH
           )
         endif()
diff --git a/cpp/src/arrow/python/builtin_convert.cc b/cpp/src/arrow/python/builtin_convert.cc
index 49f2b31..f193961 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -427,7 +427,7 @@ class BoolConverter : public TypedConverterVisitor<BooleanBuilder, BoolConverter
   }
 };
 
-template <typename IntType>
+template <typename IntType, bool from_pandas = true>
 class TypedIntConverter
     : public TypedConverterVisitor<NumericBuilder<IntType>, TypedIntConverter<IntType>> {
  public:
@@ -439,6 +439,23 @@ class TypedIntConverter
   }
 };
 
+template <typename IntType>
+class TypedIntConverter<IntType, false>
+    : public TypedConverterVisitor<NumericBuilder<IntType>,
+                                   TypedIntConverter<IntType, false>> {
+ public:
+  Status AppendSingle(PyObject* obj) {
+    return (obj == Py_None) ? this->AppendNull() : this->AppendItem(obj);
+  }
+
+  // Append a non-missing item
+  Status AppendItem(PyObject* obj) {
+    typename IntType::c_type value;
+    RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
+    return this->typed_builder_->Append(value);
+  }
+};
+
 class Date32Converter : public TypedConverterVisitor<Date32Builder, Date32Converter> {
  public:
   // Append a non-missing item
@@ -523,18 +540,37 @@ class TimestampConverter
   TimeUnit::type unit_;
 };
 
+template <bool from_pandas = true>
 class Float16Converter
-    : public TypedConverterVisitor<HalfFloatBuilder, Float16Converter> {
+    : public TypedConverterVisitor<HalfFloatBuilder, Float16Converter<from_pandas>> {
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
     npy_half val;
     RETURN_NOT_OK(PyFloat_AsHalf(obj, &val));
-    return typed_builder_->Append(val);
+    return this->typed_builder_->Append(val);
+  }
+};
+
+template <>
+class Float16Converter<false>
+    : public TypedConverterVisitor<HalfFloatBuilder, Float16Converter<false>> {
+ public:
+  Status AppendSingle(PyObject* obj) override {
+    return (obj == Py_None) ? this->AppendNull() : this->AppendItem(obj);
+  }
+
+  // Append a non-missing item
+  Status AppendItem(PyObject* obj) {
+    npy_half val;
+    RETURN_NOT_OK(PyFloat_AsHalf(obj, &val));
+    return this->typed_builder_->Append(val);
   }
 };
 
-class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Converter> {
+template <bool from_pandas = true>
+class Float32Converter
+    : public TypedConverterVisitor<FloatBuilder, Float32Converter<true>> {
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
@@ -544,7 +580,25 @@ class Float32Converter : public TypedConverterVisitor<FloatBuilder, Float32Conve
   }
 };
 
-class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConverter> {
+template <>
+class Float32Converter<false>
+    : public TypedConverterVisitor<FloatBuilder, Float32Converter<false>> {
+ public:
+  Status AppendSingle(PyObject* obj) override {
+    return (obj == Py_None) ? this->AppendNull() : this->AppendItem(obj);
+  }
+
+  // Append a non-missing item
+  Status AppendItem(PyObject* obj) {
+    float val = static_cast<float>(PyFloat_AsDouble(obj));
+    RETURN_IF_PYERROR();
+    return this->typed_builder_->Append(val);
+  }
+};
+
+template <bool from_pandas = true>
+class DoubleConverter
+    : public TypedConverterVisitor<DoubleBuilder, DoubleConverter<true>> {
  public:
   // Append a non-missing item
   Status AppendItem(PyObject* obj) {
@@ -554,6 +608,22 @@ class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, DoubleConver
   }
 };
 
+template <>
+class DoubleConverter<false>
+    : public TypedConverterVisitor<DoubleBuilder, DoubleConverter<false>> {
+ public:
+  Status AppendSingle(PyObject* obj) override {
+    return (obj == Py_None) ? this->AppendNull() : this->AppendItem(obj);
+  }
+
+  // Append a non-missing item
+  Status AppendItem(PyObject* obj) {
+    double val = PyFloat_AsDouble(obj);
+    RETURN_IF_PYERROR();
+    return this->typed_builder_->Append(val);
+  }
+};
+
 class BytesConverter : public TypedConverterVisitor<BinaryBuilder, BytesConverter> {
  public:
   // Append a non-missing item
@@ -581,6 +651,8 @@ class UTF8Converter : public TypedConverterVisitor<StringBuilder, UTF8Converter>
 
 class ListConverter : public TypedConverterVisitor<ListBuilder, ListConverter> {
  public:
+  explicit ListConverter(bool from_pandas) : from_pandas_(from_pandas) {}
+
   Status Init(ArrayBuilder* builder) override;
 
   // Append a non-missing item
@@ -595,10 +667,13 @@ class ListConverter : public TypedConverterVisitor<ListBuilder, ListConverter> {
 
  protected:
   std::unique_ptr<SeqConverter> value_converter_;
+  bool from_pandas_;
 };
 
 class StructConverter : public TypedConverterVisitor<StructBuilder, StructConverter> {
  public:
+  explicit StructConverter(bool from_pandas) : from_pandas_(from_pandas) {}
+
   Status Init(ArrayBuilder* builder) override;
 
   // Append a non-missing item
@@ -660,6 +735,7 @@ class StructConverter : public TypedConverterVisitor<StructBuilder, StructConver
   int num_fields_;
   // Whether we're converting from a sequence of dicts or tuples
   enum { UNKNOWN, DICTS, TUPLES } source_kind_ = UNKNOWN;
+  bool from_pandas_;
 };
 
 class DecimalConverter
@@ -674,29 +750,39 @@ class DecimalConverter
   }
 };
 
+#define INT_CONVERTER(ArrowType)                                                     \
+  {                                                                                  \
+    if (from_pandas) {                                                               \
+      return std::unique_ptr<SeqConverter>(new TypedIntConverter<ArrowType, true>);  \
+    } else {                                                                         \
+      return std::unique_ptr<SeqConverter>(new TypedIntConverter<ArrowType, false>); \
+    }                                                                                \
+  }
+
 // Dynamic constructor for sequence converters
-std::unique_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) {
+std::unique_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type,
+                                           bool from_pandas) {
   switch (type->id()) {
     case Type::NA:
       return std::unique_ptr<SeqConverter>(new NullConverter);
     case Type::BOOL:
       return std::unique_ptr<SeqConverter>(new BoolConverter);
     case Type::INT8:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<Int8Type>);
+      INT_CONVERTER(Int8Type)
     case Type::INT16:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<Int16Type>);
+      INT_CONVERTER(Int16Type)
     case Type::INT32:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<Int32Type>);
+      INT_CONVERTER(Int32Type)
     case Type::INT64:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<Int64Type>);
+      INT_CONVERTER(Int64Type)
     case Type::UINT8:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<UInt8Type>);
+      INT_CONVERTER(UInt8Type)
     case Type::UINT16:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<UInt16Type>);
+      INT_CONVERTER(UInt16Type)
     case Type::UINT32:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<UInt32Type>);
+      INT_CONVERTER(UInt32Type)
     case Type::UINT64:
-      return std::unique_ptr<SeqConverter>(new TypedIntConverter<UInt64Type>);
+      INT_CONVERTER(UInt64Type)
     case Type::DATE32:
       return std::unique_ptr<SeqConverter>(new Date32Converter);
     case Type::DATE64:
@@ -704,12 +790,27 @@ std::unique_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
     case Type::TIMESTAMP:
       return std::unique_ptr<SeqConverter>(
           new TimestampConverter(checked_cast<const TimestampType&>(*type).unit()));
-    case Type::HALF_FLOAT:
-      return std::unique_ptr<SeqConverter>(new Float16Converter);
-    case Type::FLOAT:
-      return std::unique_ptr<SeqConverter>(new Float32Converter);
-    case Type::DOUBLE:
-      return std::unique_ptr<SeqConverter>(new DoubleConverter);
+    case Type::HALF_FLOAT: {
+      if (from_pandas) {
+        return std::unique_ptr<SeqConverter>(new Float16Converter<true>);
+      } else {
+        return std::unique_ptr<SeqConverter>(new Float16Converter<false>);
+      }
+    }
+    case Type::FLOAT: {
+      if (from_pandas) {
+        return std::unique_ptr<SeqConverter>(new Float32Converter<true>);
+      } else {
+        return std::unique_ptr<SeqConverter>(new Float32Converter<false>);
+      }
+    }
+    case Type::DOUBLE: {
+      if (from_pandas) {
+        return std::unique_ptr<SeqConverter>(new DoubleConverter<true>);
+      } else {
+        return std::unique_ptr<SeqConverter>(new DoubleConverter<false>);
+      }
+    }
     case Type::BINARY:
       return std::unique_ptr<SeqConverter>(new BytesConverter);
     case Type::FIXED_SIZE_BINARY:
@@ -717,9 +818,9 @@ std::unique_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type
     case Type::STRING:
       return std::unique_ptr<SeqConverter>(new UTF8Converter);
     case Type::LIST:
-      return std::unique_ptr<SeqConverter>(new ListConverter);
+      return std::unique_ptr<SeqConverter>(new ListConverter(from_pandas));
     case Type::STRUCT:
-      return std::unique_ptr<SeqConverter>(new StructConverter);
+      return std::unique_ptr<SeqConverter>(new StructConverter(from_pandas));
     case Type::DECIMAL:
       return std::unique_ptr<SeqConverter>(new DecimalConverter);
     default:
@@ -731,8 +832,8 @@ Status ListConverter::Init(ArrayBuilder* builder) {
   builder_ = builder;
   typed_builder_ = checked_cast<ListBuilder*>(builder);
 
-  value_converter_ =
-      GetConverter(checked_cast<const ListType&>(*builder->type()).value_type());
+  value_converter_ = GetConverter(
+      checked_cast<const ListType&>(*builder->type()).value_type(), from_pandas_);
   if (value_converter_ == nullptr) {
     return Status::NotImplemented("value type not implemented");
   }
@@ -756,7 +857,7 @@ Status StructConverter::Init(ArrayBuilder* builder) {
     const std::string& field_name(struct_type.child(i)->name());
     std::shared_ptr<DataType> field_type(struct_type.child(i)->type());
 
-    auto value_converter = GetConverter(field_type);
+    auto value_converter = GetConverter(field_type, from_pandas_);
     if (value_converter == nullptr) {
       return Status::NotImplemented("value type not implemented");
     }
@@ -774,9 +875,10 @@ Status StructConverter::Init(ArrayBuilder* builder) {
 }
 
 Status AppendPySequence(PyObject* obj, int64_t size,
-                        const std::shared_ptr<DataType>& type, ArrayBuilder* builder) {
+                        const std::shared_ptr<DataType>& type, ArrayBuilder* builder,
+                        bool from_pandas) {
   PyDateTime_IMPORT;
-  auto converter = GetConverter(type);
+  auto converter = GetConverter(type, from_pandas);
   if (converter == nullptr) {
     std::stringstream ss;
     ss << "No type converter implemented for " << type->ToString();
@@ -788,7 +890,8 @@ Status AppendPySequence(PyObject* obj, int64_t size,
 
 static Status ConvertPySequenceReal(PyObject* obj, int64_t size,
                                     const std::shared_ptr<DataType>* type,
-                                    MemoryPool* pool, std::shared_ptr<Array>* out) {
+                                    MemoryPool* pool, bool from_pandas,
+                                    std::shared_ptr<Array>* out) {
   PyAcquireGIL lock;
 
   PyObject* seq;
@@ -814,28 +917,30 @@ static Status ConvertPySequenceReal(PyObject* obj, int64_t size,
   // Give the sequence converter an array builder
   std::unique_ptr<ArrayBuilder> builder;
   RETURN_NOT_OK(MakeBuilder(pool, real_type, &builder));
-  RETURN_NOT_OK(AppendPySequence(seq, size, real_type, builder.get()));
+  RETURN_NOT_OK(AppendPySequence(seq, size, real_type, builder.get(), from_pandas));
   return builder->Finish(out);
 }
 
-Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out) {
-  return ConvertPySequenceReal(obj, -1, nullptr, pool, out);
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, bool from_pandas,
+                         std::shared_ptr<Array>* out) {
+  return ConvertPySequenceReal(obj, -1, nullptr, pool, from_pandas, out);
 }
 
 Status ConvertPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
-                         MemoryPool* pool, std::shared_ptr<Array>* out) {
-  return ConvertPySequenceReal(obj, -1, &type, pool, out);
+                         MemoryPool* pool, bool from_pandas,
+                         std::shared_ptr<Array>* out) {
+  return ConvertPySequenceReal(obj, -1, &type, pool, from_pandas, out);
 }
 
-Status ConvertPySequence(PyObject* obj, int64_t size, MemoryPool* pool,
+Status ConvertPySequence(PyObject* obj, int64_t size, MemoryPool* pool, bool from_pandas,
                          std::shared_ptr<Array>* out) {
-  return ConvertPySequenceReal(obj, size, nullptr, pool, out);
+  return ConvertPySequenceReal(obj, size, nullptr, pool, from_pandas, out);
 }
 
 Status ConvertPySequence(PyObject* obj, int64_t size,
                          const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                         std::shared_ptr<Array>* out) {
-  return ConvertPySequenceReal(obj, size, &type, pool, out);
+                         bool from_pandas, std::shared_ptr<Array>* out) {
+  return ConvertPySequenceReal(obj, size, &type, pool, from_pandas, out);
 }
 
 }  // namespace py
diff --git a/cpp/src/arrow/python/builtin_convert.h b/cpp/src/arrow/python/builtin_convert.h
index 7a32bec..d9b5ecd 100644
--- a/cpp/src/arrow/python/builtin_convert.h
+++ b/cpp/src/arrow/python/builtin_convert.h
@@ -47,27 +47,29 @@ ARROW_EXPORT arrow::Status InferArrowTypeAndSize(
 
 ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj, int64_t size,
                                             const std::shared_ptr<arrow::DataType>& type,
-                                            arrow::ArrayBuilder* builder);
+                                            arrow::ArrayBuilder* builder,
+                                            bool from_pandas);
 
 // Type and size inference
 ARROW_EXPORT
-Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out);
+Status ConvertPySequence(PyObject* obj, MemoryPool* pool, bool from_pandas,
+                         std::shared_ptr<Array>* out);
 
 // Type inference only
 ARROW_EXPORT
-Status ConvertPySequence(PyObject* obj, int64_t size, MemoryPool* pool,
+Status ConvertPySequence(PyObject* obj, int64_t size, MemoryPool* pool, bool from_pandas,
                          std::shared_ptr<Array>* out);
 
 // Size inference only
 ARROW_EXPORT
 Status ConvertPySequence(PyObject* obj, const std::shared_ptr<DataType>& type,
-                         MemoryPool* pool, std::shared_ptr<Array>* out);
+                         MemoryPool* pool, bool from_pandas, std::shared_ptr<Array>* out);
 
 // No inference
 ARROW_EXPORT
 Status ConvertPySequence(PyObject* obj, int64_t size,
                          const std::shared_ptr<DataType>& type, MemoryPool* pool,
-                         std::shared_ptr<Array>* out);
+                         bool from_pandas, std::shared_ptr<Array>* out);
 
 ARROW_EXPORT
 Status InvalidConversion(PyObject* obj, const std::string& expected_type_name,
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index e18cced..09926ba 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -1273,7 +1273,8 @@ inline Status NumPyConverter::ConvertTypedLists(const std::shared_ptr<DataType>&
         ss << inferred_type->ToString() << " cannot be converted to " << type->ToString();
         return Status::TypeError(ss.str());
       }
-      return AppendPySequence(object, size, type, value_builder);
+      return AppendPySequence(object, size, type, value_builder,
+                              use_pandas_null_sentinels_);
     } else {
       return Status::TypeError("Unsupported Python type for list items");
     }
@@ -1368,7 +1369,8 @@ inline Status NumPyConverter::ConvertTypedLists<NPY_OBJECT, BinaryType>(
         ss << inferred_type->ToString() << " cannot be converted to BINARY.";
         return Status::TypeError(ss.str());
       }
-      return AppendPySequence(object, size, type, value_builder);
+      return AppendPySequence(object, size, type, value_builder,
+                              use_pandas_null_sentinels_);
     } else {
       return Status::TypeError("Unsupported Python type for list items");
     }
@@ -1425,7 +1427,8 @@ inline Status NumPyConverter::ConvertTypedLists<NPY_OBJECT, StringType>(
         ss << inferred_type->ToString() << " cannot be converted to STRING.";
         return Status::TypeError(ss.str());
       }
-      return AppendPySequence(object, size, type, value_builder);
+      return AppendPySequence(object, size, type, value_builder,
+                              use_pandas_null_sentinels_);
     } else {
       return Status::TypeError("Unsupported Python type for list items");
     }
diff --git a/cpp/src/arrow/python/python-test.cc b/cpp/src/arrow/python/python-test.cc
index abe93b0..9763bef 100644
--- a/cpp/src/arrow/python/python-test.cc
+++ b/cpp/src/arrow/python/python-test.cc
@@ -269,7 +269,7 @@ TEST(BuiltinConversionTest, TestMixedTypeFails) {
   ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
   ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);
 
-  ASSERT_RAISES(TypeError, ConvertPySequence(list, pool, &arr));
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, pool, false, &arr));
 }
 
 TEST_F(DecimalTest, FromPythonDecimalRescaleNotTruncateable) {
@@ -349,7 +349,7 @@ TEST_F(DecimalTest, TestNoneAndNaN) {
 
   MemoryPool* pool = default_memory_pool();
   std::shared_ptr<Array> arr;
-  ASSERT_OK(ConvertPySequence(list, pool, &arr));
+  ASSERT_OK(ConvertPySequence(list, pool, false, &arr));
   ASSERT_TRUE(arr->IsValid(0));
   ASSERT_TRUE(arr->IsNull(1));
   ASSERT_TRUE(arr->IsNull(2));
@@ -374,7 +374,7 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScale) {
 
   MemoryPool* pool = default_memory_pool();
   std::shared_ptr<Array> arr;
-  ASSERT_OK(ConvertPySequence(list, pool, &arr));
+  ASSERT_OK(ConvertPySequence(list, pool, false, &arr));
   const auto& type = checked_cast<const DecimalType&>(*arr->type());
 
   int32_t expected_precision = 9;
@@ -402,7 +402,7 @@ TEST_F(DecimalTest, TestMixedPrecisionAndScaleSequenceConvert) {
   ASSERT_EQ(PyList_SetItem(list, 0, value1), 0);
   ASSERT_EQ(PyList_SetItem(list, 1, value2), 0);
 
-  ASSERT_OK(ConvertPySequence(list, pool, &arr));
+  ASSERT_OK(ConvertPySequence(list, pool, false, &arr));
 
   const auto& type = checked_cast<const Decimal128Type&>(*arr->type());
   ASSERT_EQ(3, type.precision());
@@ -438,7 +438,7 @@ TEST(PythonTest, ConstructStringArrayWithLeadingZeros) {
 
   std::shared_ptr<Array> out;
   auto pool = default_memory_pool();
-  ASSERT_OK(ConvertPySequence(list, pool, &out));
+  ASSERT_OK(ConvertPySequence(list, pool, false, &out));
 }
 
 }  // namespace py
diff --git a/python/doc/source/data.rst b/python/doc/source/data.rst
index 0717260..3f4169c 100644
--- a/python/doc/source/data.rst
+++ b/python/doc/source/data.rst
@@ -198,6 +198,21 @@ Arrays can be sliced without copying:
 
    arr[1:3]
 
+None values and NAN handling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As mentioned in the above section, the Python object ``None`` is always
+converted to an Arrow null element on the conversion to ``pyarrow.Array``. For
+the float NaN value which is either represented by the Python object
+``float('nan')`` or ``numpy.nan`` we normally convert it to a *valid* float
+value during the conversion. If an integer input is supplied to
+``pyarrow.array`` that contains ``np.nan``, ``ValueError`` is raised.
+
+To handle better compability with Pandas, we support interpreting NaN values as
+null elements. This is enabled automatically on all ``from_pandas`` function and
+can be enable on the other conversion functions by passing ``from_pandas=True``
+as a function parameter.
+
 List arrays
 ~~~~~~~~~~~
 
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 9d14e1e..de59509 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -17,25 +17,29 @@
 
 
 cdef _sequence_to_array(object sequence, object size, DataType type,
-                        CMemoryPool* pool):
+                        CMemoryPool* pool, c_bool from_pandas):
     cdef shared_ptr[CArray] out
     cdef int64_t c_size
     if type is None:
         if size is None:
             with nogil:
-                check_status(ConvertPySequence(sequence, pool, &out))
+                check_status(
+                    ConvertPySequence(sequence, pool, from_pandas, &out)
+                )
         else:
             c_size = size
             with nogil:
                 check_status(
-                    ConvertPySequence(sequence, c_size, pool, &out)
+                    ConvertPySequence(
+                        sequence, c_size, pool, from_pandas, &out
+                    )
                 )
     else:
         if size is None:
             with nogil:
                 check_status(
                     ConvertPySequence(
-                        sequence, type.sp_type, pool, &out,
+                        sequence, type.sp_type, pool, from_pandas, &out,
                     )
                 )
         else:
@@ -43,7 +47,8 @@ cdef _sequence_to_array(object sequence, object size, DataType type,
             with nogil:
                 check_status(
                     ConvertPySequence(
-                        sequence, c_size, type.sp_type, pool, &out,
+                        sequence, c_size, type.sp_type, pool, from_pandas,
+                        &out,
                     )
                 )
 
@@ -178,7 +183,7 @@ def array(object obj, type=None, mask=None,
     else:
         if mask is not None:
             raise ValueError("Masks only supported with ndarray-like inputs")
-        return _sequence_to_array(obj, size, type, pool)
+        return _sequence_to_array(obj, size, type, pool, from_pandas)
 
 
 def asarray(values, type=None):
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index d617986..dc8470f 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -938,14 +938,17 @@ cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil:
     object PyHalf_FromHalf(npy_half value)
 
     CStatus ConvertPySequence(object obj, CMemoryPool* pool,
+                              c_bool from_pandas,
                               shared_ptr[CArray]* out)
     CStatus ConvertPySequence(object obj, const shared_ptr[CDataType]& type,
-                              CMemoryPool* pool, shared_ptr[CArray]* out)
-    CStatus ConvertPySequence(object obj, int64_t size, CMemoryPool* pool,
+                              CMemoryPool* pool, c_bool from_pandas,
                               shared_ptr[CArray]* out)
+    CStatus ConvertPySequence(object obj, int64_t size, CMemoryPool* pool,
+                              c_bool from_pandas, shared_ptr[CArray]* out)
     CStatus ConvertPySequence(object obj, int64_t size,
                               const shared_ptr[CDataType]& type,
                               CMemoryPool* pool,
+                              c_bool from_pandas,
                               shared_ptr[CArray]* out)
 
     CStatus NumPyDtypeToArrow(object dtype, shared_ptr[CDataType]* type)
diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py
index 31228b4..7c7918e 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -226,6 +226,40 @@ def test_sequence_integer(seq, np_scalar_pa_type):
 
 
 @parametrize_with_iterable_types
+@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer_np_nan(seq, np_scalar_pa_type):
+    # ARROW-2806: numpy.nan is a double value and thus should produce
+    # a double array.
+    _, pa_type = np_scalar_pa_type
+    with pytest.raises(ValueError):
+        pa.array(seq([np.nan]), type=pa_type, from_pandas=False)
+
+    arr = pa.array(seq([np.nan]), type=pa_type, from_pandas=True)
+    expected = [None]
+    assert len(arr) == 1
+    assert arr.null_count == 1
+    assert arr.type == pa_type
+    assert arr.to_pylist() == expected
+
+
+@parametrize_with_iterable_types
+@pytest.mark.parametrize("np_scalar_pa_type", int_type_pairs)
+def test_sequence_integer_nested_np_nan(seq, np_scalar_pa_type):
+    # ARROW-2806: numpy.nan is a double value and thus should produce
+    # a double array.
+    _, pa_type = np_scalar_pa_type
+    with pytest.raises(ValueError):
+        pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=False)
+
+    arr = pa.array(seq([[np.nan]]), type=pa.list_(pa_type), from_pandas=True)
+    expected = [[None]]
+    assert len(arr) == 1
+    assert arr.null_count == 0
+    assert arr.type == pa.list_(pa_type)
+    assert arr.to_pylist() == expected
+
+
+@parametrize_with_iterable_types
 def test_sequence_integer_inferred(seq):
     expected = [1, None, 3, None]
     arr = pa.array(seq(expected))
@@ -310,13 +344,43 @@ def test_sequence_double():
 
 @parametrize_with_iterable_types
 @pytest.mark.parametrize("np_scalar", [np.float16, np.float32, np.float64])
-def test_sequence_numpy_double(seq, np_scalar):
-    data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, None]
-    arr = pa.array(seq(data))
+@pytest.mark.parametrize("from_pandas", [True, False])
+def test_sequence_numpy_double(seq, np_scalar, from_pandas):
+    data = [np_scalar(1.5), np_scalar(1), None, np_scalar(2.5), None, np.nan]
+    arr = pa.array(seq(data), from_pandas=from_pandas)
     assert len(arr) == 6
-    assert arr.null_count == 3
+    if from_pandas:
+        assert arr.null_count == 3
+    else:
+        assert arr.null_count == 2
     assert arr.type == pa.float64()
-    assert arr.to_pylist() == data
+
+    assert arr.to_pylist()[:4] == data[:4]
+    if from_pandas:
+        assert arr.to_pylist()[5] is None
+    else:
+        assert np.isnan(arr.to_pylist()[5])
+
+
+@pytest.mark.parametrize("from_pandas", [True, False])
+@pytest.mark.parametrize("inner_seq", [np.array, list])
+def test_ndarray_nested_numpy_double(from_pandas, inner_seq):
+    # ARROW-2806
+    data = np.array([
+        inner_seq([1., 2.]),
+        inner_seq([1., 2., 3.]),
+        inner_seq([np.nan]),
+        None
+    ])
+    arr = pa.array(data, from_pandas=from_pandas)
+    assert len(arr) == 4
+    assert arr.null_count == 1
+    assert arr.type == pa.list_(pa.float64())
+    if from_pandas:
+        assert arr.to_pylist() == [[1.0, 2.0], [1.0, 2.0, 3.0], [None], None]
+    else:
+        np.testing.assert_equal(arr.to_pylist(),
+                                [[1., 2.], [1., 2., 3.], [np.nan], None])
 
 
 def test_sequence_unicode():