You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fs...@apache.org on 2019/06/27 14:05:14 UTC
[arrow] branch master updated: ARROW-5749: [Python] Added python
binding for Table::CombineChunks
This is an automated email from the ASF dual-hosted git repository.
fsaintjacques pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 1deef81 ARROW-5749: [Python] Added python binding for Table::CombineChunks
1deef81 is described below
commit 1deef812bd84f94e214cd64fa8df9adeb3d17bfe
Author: Zhuo Peng <18...@users.noreply.github.com>
AuthorDate: Thu Jun 27 10:04:58 2019 -0400
ARROW-5749: [Python] Added python binding for Table::CombineChunks
Table::CombineChunks was added in https://github.com/apache/arrow/pull/4598 . This PR adds the python binding for it.
Author: Zhuo Peng <18...@users.noreply.github.com>
Author: François Saint-Jacques <fs...@gmail.com>
Closes #4712 from brills/combine-chunks-py and squashes the following commits:
6ae43f9e6 <François Saint-Jacques> Fix test indentation
6200ab440 <Zhuo Peng> also test number of chunks after combining
fd0d2152c <Zhuo Peng> Added python binding for Table::CombineChunks.
---
python/pyarrow/includes/libarrow.pxd | 2 ++
python/pyarrow/table.pxi | 25 +++++++++++++++++++++++++
python/pyarrow/tests/test_table.py | 13 +++++++++++++
3 files changed, 40 insertions(+)
diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 305055c..79389ca 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -558,6 +558,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
CStatus Flatten(CMemoryPool* pool, shared_ptr[CTable]* out)
+ CStatus CombineChunks(CMemoryPool* pool, shared_ptr[CTable]* out)
+
CStatus Validate()
shared_ptr[CTable] ReplaceSchemaMetadata(
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 97eed36..0a76ddb 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1029,6 +1029,31 @@ cdef class Table(_PandasConvertible):
return pyarrow_wrap_table(flattened)
+ def combine_chunks(self, MemoryPool memory_pool=None):
+ """
+ Make a new table by combining the chunks this table has.
+
+ All the underlying chunks in the ChunkedArray of each column are
+ concatenated into zero or one chunk.
+
+ Parameters
+ ----------
+ memory_pool : MemoryPool, default None
+ For memory allocations, if required, otherwise use default pool
+
+ Returns
+ -------
+ result : Table
+ """
+ cdef:
+ shared_ptr[CTable] combined
+ CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+ with nogil:
+ check_status(self.table.CombineChunks(pool, &combined))
+
+ return pyarrow_wrap_table(combined)
+
def __eq__(self, other):
try:
return self.equals(other)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 7106a3f..c7216ea 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -781,6 +781,19 @@ def test_table_flatten():
assert t2.equals(expected)
+def test_table_combine_chunks():
+ batch1 = pa.RecordBatch.from_arrays([pa.array([1]), pa.array(["a"])],
+ names=['f1', 'f2'])
+ batch2 = pa.RecordBatch.from_arrays([pa.array([2]), pa.array(["b"])],
+ names=['f1', 'f2'])
+ table = pa.Table.from_batches([batch1, batch2])
+ combined = table.combine_chunks()
+ combined._validate()
+ assert combined.equals(table)
+ for c in combined.columns:
+ assert c.data.num_chunks == 1
+
+
def test_concat_tables():
data = [
list(range(5)),