You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fs...@apache.org on 2019/06/27 14:05:14 UTC

[arrow] branch master updated: ARROW-5749: [Python] Added python binding for Table::CombineChunks

This is an automated email from the ASF dual-hosted git repository.

fsaintjacques pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 1deef81  ARROW-5749: [Python] Added python binding for Table::CombineChunks
1deef81 is described below

commit 1deef812bd84f94e214cd64fa8df9adeb3d17bfe
Author: Zhuo Peng <18...@users.noreply.github.com>
AuthorDate: Thu Jun 27 10:04:58 2019 -0400

    ARROW-5749: [Python] Added python binding for Table::CombineChunks
    
    Table::CombineChunks was added in https://github.com/apache/arrow/pull/4598 . This PR adds the python binding for it.
    
    Author: Zhuo Peng <18...@users.noreply.github.com>
    Author: François Saint-Jacques <fs...@gmail.com>
    
    Closes #4712 from brills/combine-chunks-py and squashes the following commits:
    
    6ae43f9e6 <François Saint-Jacques> Fix test indentation
    6200ab440 <Zhuo Peng> also test number of chunks after combining
    fd0d2152c <Zhuo Peng> Added python binding for Table::CombineChunks.
---
 python/pyarrow/includes/libarrow.pxd |  2 ++
 python/pyarrow/table.pxi             | 25 +++++++++++++++++++++++++
 python/pyarrow/tests/test_table.py   | 13 +++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd
index 305055c..79389ca 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -558,6 +558,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
 
         CStatus Flatten(CMemoryPool* pool, shared_ptr[CTable]* out)
 
+        CStatus CombineChunks(CMemoryPool* pool, shared_ptr[CTable]* out)
+
         CStatus Validate()
 
         shared_ptr[CTable] ReplaceSchemaMetadata(
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 97eed36..0a76ddb 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -1029,6 +1029,31 @@ cdef class Table(_PandasConvertible):
 
         return pyarrow_wrap_table(flattened)
 
+    def combine_chunks(self, MemoryPool memory_pool=None):
+        """
+        Make a new table by combining the chunks this table has.
+
+        All the underlying chunks in the ChunkedArray of each column are
+        concatenated into zero or one chunk.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool
+
+        Returns
+        -------
+        result : Table
+        """
+        cdef:
+            shared_ptr[CTable] combined
+            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+        with nogil:
+            check_status(self.table.CombineChunks(pool, &combined))
+
+        return pyarrow_wrap_table(combined)
+
     def __eq__(self, other):
         try:
             return self.equals(other)
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 7106a3f..c7216ea 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -781,6 +781,19 @@ def test_table_flatten():
     assert t2.equals(expected)
 
 
+def test_table_combine_chunks():
+    batch1 = pa.RecordBatch.from_arrays([pa.array([1]), pa.array(["a"])],
+                                        names=['f1', 'f2'])
+    batch2 = pa.RecordBatch.from_arrays([pa.array([2]), pa.array(["b"])],
+                                        names=['f1', 'f2'])
+    table = pa.Table.from_batches([batch1, batch2])
+    combined = table.combine_chunks()
+    combined._validate()
+    assert combined.equals(table)
+    for c in combined.columns:
+        assert c.data.num_chunks == 1
+
+
 def test_concat_tables():
     data = [
         list(range(5)),