You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/06/11 18:00:56 UTC

[arrow] branch master updated: ARROW-5554: [Python] Added a python wrapper for arrow::Concatenate()

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new c7b5656  ARROW-5554: [Python] Added a python wrapper for arrow::Concatenate()
c7b5656 is described below

commit c7b56568e88c4065413d34f577bdc69290f19de6
Author: Zhuo Peng <18...@users.noreply.github.com>
AuthorDate: Tue Jun 11 20:00:43 2019 +0200

    ARROW-5554: [Python] Added a python wrapper for arrow::Concatenate()
    
    Author: Zhuo Peng <18...@users.noreply.github.com>
    
    Closes #4519 from brills/conc-wrap and squashes the following commits:
    
    c3f45b42c <Zhuo Peng> doc
    1576859d7 <Zhuo Peng> Added a python wrapper for arrow::Concatenate().
---
 python/pyarrow/__init__.py           |  2 +-
 python/pyarrow/array.pxi             | 27 +++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  4 ++++
 python/pyarrow/tests/test_array.py   | 11 +++++++++++
 4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 117b1d7..f9ba819 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -109,7 +109,7 @@ from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
                          MockOutputStream, input_stream, output_stream)
 
 from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
-                         concat_tables)
+                         concat_arrays, concat_tables)
 
 # Exceptions
 from pyarrow.lib import (ArrowException,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 957c875..e504806 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1472,3 +1472,30 @@ cdef object get_series_values(object obj):
         result = pandas_api.make_series(obj).values
 
     return result
+
+
+def concat_arrays(arrays, MemoryPool memory_pool=None):
+    """
+    Returns a concatenation of the given arrays. The contents of those arrays
+    are copied into the returned array. Raises exception if all of the arrays
+    are not of the same type.
+
+    Parameters
+    ----------
+    arrays : iterable of pyarrow.Array objects
+    memory_pool : MemoryPool, default None
+        For memory allocations. If None, the default pool is used.
+    """
+    cdef:
+        vector[shared_ptr[CArray]] c_arrays
+        shared_ptr[CArray] c_result
+        Array array
+        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+    for array in arrays:
+        c_arrays.push_back(array.sp_array)
+
+    with nogil:
+        check_status(Concatenate(c_arrays, pool, &c_result))
+
+    return pyarrow_wrap_array(c_result)
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8443c0c..1e32b87 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1297,3 +1297,7 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
 cdef extern from 'arrow/util/thread-pool.h' namespace 'arrow' nogil:
     int GetCpuThreadPoolCapacity()
     CStatus SetCpuThreadPoolCapacity(int threads)
+
+cdef extern from 'arrow/array/concatenate.h' namespace 'arrow' nogil:
+    CStatus Concatenate(const vector[shared_ptr[CArray]]& arrays,
+                        CMemoryPool* pool, shared_ptr[CArray]* result)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 9e4132a..07164e4 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1381,3 +1381,14 @@ def test_array_from_large_pyints():
     with pytest.raises(pa.ArrowInvalid):
         # too large for int64 so dtype must be explicitly provided
         pa.array([int(2 ** 63)])
+
+
+def test_concat_array():
+    concatenated = pa.concat_arrays(
+        [pa.array([1, 2]), pa.array([3, 4])])
+    assert concatenated.equals(pa.array([1, 2, 3, 4]))
+
+
+def test_concat_array_different_types():
+    with pytest.raises(pa.ArrowInvalid):
+        pa.concat_arrays([pa.array([1]), pa.array([2.])])