You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2019/06/11 18:00:56 UTC
[arrow] branch master updated: ARROW-5554: [Python] Added a python
wrapper for arrow::Concatenate()
This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new c7b5656 ARROW-5554: [Python] Added a python wrapper for arrow::Concatenate()
c7b5656 is described below
commit c7b56568e88c4065413d34f577bdc69290f19de6
Author: Zhuo Peng <18...@users.noreply.github.com>
AuthorDate: Tue Jun 11 20:00:43 2019 +0200
ARROW-5554: [Python] Added a python wrapper for arrow::Concatenate()
Author: Zhuo Peng <18...@users.noreply.github.com>
Closes #4519 from brills/conc-wrap and squashes the following commits:
c3f45b42c <Zhuo Peng> doc
1576859d7 <Zhuo Peng> Added a python wrapper for arrow::Concatenate().
---
python/pyarrow/__init__.py | 2 +-
python/pyarrow/array.pxi | 27 +++++++++++++++++++++++++++
python/pyarrow/includes/libarrow.pxd | 4 ++++
python/pyarrow/tests/test_array.py | 11 +++++++++++
4 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 117b1d7..f9ba819 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -109,7 +109,7 @@ from pyarrow.lib import (HdfsFile, NativeFile, PythonFile,
MockOutputStream, input_stream, output_stream)
from pyarrow.lib import (ChunkedArray, Column, RecordBatch, Table,
- concat_tables)
+ concat_arrays, concat_tables)
# Exceptions
from pyarrow.lib import (ArrowException,
diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 957c875..e504806 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1472,3 +1472,30 @@ cdef object get_series_values(object obj):
result = pandas_api.make_series(obj).values
return result
+
+
+def concat_arrays(arrays, MemoryPool memory_pool=None):
+ """
+ Returns a concatenation of the given arrays. The contents of those arrays
+ are copied into the returned array. Raises exception if all of the arrays
+ are not of the same type.
+
+ Parameters
+ ----------
+ arrays : iterable of pyarrow.Array objects
+ memory_pool : MemoryPool, default None
+ For memory allocations. If None, the default pool is used.
+ """
+ cdef:
+ vector[shared_ptr[CArray]] c_arrays
+ shared_ptr[CArray] c_result
+ Array array
+ CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+ for array in arrays:
+ c_arrays.push_back(array.sp_array)
+
+ with nogil:
+ check_status(Concatenate(c_arrays, pool, &c_result))
+
+ return pyarrow_wrap_array(c_result)
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 8443c0c..1e32b87 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -1297,3 +1297,7 @@ cdef extern from 'arrow/util/compression.h' namespace 'arrow' nogil:
cdef extern from 'arrow/util/thread-pool.h' namespace 'arrow' nogil:
int GetCpuThreadPoolCapacity()
CStatus SetCpuThreadPoolCapacity(int threads)
+
+cdef extern from 'arrow/array/concatenate.h' namespace 'arrow' nogil:
+ CStatus Concatenate(const vector[shared_ptr[CArray]]& arrays,
+ CMemoryPool* pool, shared_ptr[CArray]* result)
diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
index 9e4132a..07164e4 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1381,3 +1381,14 @@ def test_array_from_large_pyints():
with pytest.raises(pa.ArrowInvalid):
# too large for int64 so dtype must be explicitly provided
pa.array([int(2 ** 63)])
+
+
+def test_concat_array():
+ concatenated = pa.concat_arrays(
+ [pa.array([1, 2]), pa.array([3, 4])])
+ assert concatenated.equals(pa.array([1, 2, 3, 4]))
+
+
+def test_concat_array_different_types():
+ with pytest.raises(pa.ArrowInvalid):
+ pa.concat_arrays([pa.array([1]), pa.array([2.])])