You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/07/11 11:40:20 UTC

[arrow] branch master updated: ARROW-2820: [Python] Check that array lengths in RecordBatch.from_arrays are all the same

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 35e85b8  ARROW-2820: [Python] Check that array lengths in RecordBatch.from_arrays are all the same
35e85b8 is described below

commit 35e85b847df32193c204f598d98fe4e7e4e1f058
Author: Wes McKinney <we...@apache.org>
AuthorDate: Wed Jul 11 13:40:10 2018 +0200

    ARROW-2820: [Python] Check that array lengths in RecordBatch.from_arrays are all the same
    
    Failing to validate could cause a segfault
    
    Author: Wes McKinney <we...@apache.org>
    
    Closes #2249 from wesm/ARROW-2820 and squashes the following commits:
    
    29c63b67 <Wes McKinney> Check that array lengths in RecordBatch.from_arrays are all the same
---
 python/pyarrow/table.pxi           | 3 +++
 python/pyarrow/tests/test_table.py | 9 +++++++++
 2 files changed, 12 insertions(+)

diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index f393bda..963444d 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -818,6 +818,9 @@ cdef class RecordBatch:
 
         c_arrays.reserve(len(arrays))
         for arr in arrays:
+            if len(arr) != num_rows:
+                raise ValueError('Arrays were not all the same length: '
+                                 '{0} vs {1}'.format(len(arr), num_rows))
             c_arrays.push_back(arr.sp_array)
 
         return pyarrow_wrap_batch(
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 1b4e44b..c445434 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -170,6 +170,15 @@ def test_recordbatch_basics():
         batch[2]
 
 
+def test_recordbatch_from_arrays_validate_lengths():
+    # ARROW-2820
+    data = [pa.array([1]), pa.array(["tokyo", "like", "happy"]),
+            pa.array(["derek"])]
+
+    with pytest.raises(ValueError):
+        pa.RecordBatch.from_arrays(data, ['id', 'tags', 'name'])
+
+
 def test_recordbatch_no_fields():
     batch = pa.RecordBatch.from_arrays([], [])