You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/20 17:36:34 UTC

arrow git commit: ARROW-1557 [Python] Validate names length in Table.from_arrays

Repository: arrow
Updated Branches:
  refs/heads/master 903d03b0b -> 9997a1a3b


ARROW-1557 [Python] Validate names length in Table.from_arrays

We now raise a ValueError when the length of the names doesn't match
the length of the arrays.

```python
In [1]: import pyarrow as pa

In [2]: pa.Table.from_arrays([pa.array([1, 2]), pa.array([3, 4])], names=['a', 'b', 'c'])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-2-cda803f3f774> in <module>()
----> 1 pa.Table.from_arrays([pa.array([1, 2]), pa.array([3, 4])], names=['a', 'b', 'c'])

table.pxi in pyarrow.lib.Table.from_arrays()

table.pxi in pyarrow.lib._schema_from_arrays()

ValueError: Length of names (3) does not match length of arrays (2)
```

This affected `RecordBatch.from_arrays` and `Table.from_arrays`.

Author: Tom Augspurger <to...@gmail.com>
Author: Wes McKinney <we...@twosigma.com>

Closes #1117 from TomAugspurger/validate-names and squashes the following commits:

4df6f593 [Tom Augspurger] REF: avoid redundant len calculation
965a5608 [Wes McKinney] Fix test failure exposed in test_parquet.py
ed74d522 [Tom Augspurger] ARROW-1557 [Python] Validate names length in Table.from_arrays


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9997a1a3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9997a1a3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9997a1a3

Branch: refs/heads/master
Commit: 9997a1a3bacc397dc58645b2d8cfd0c3923efe01
Parents: 903d03b
Author: Tom Augspurger <to...@gmail.com>
Authored: Wed Sep 20 13:36:28 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Sep 20 13:36:28 2017 -0400

----------------------------------------------------------------------
 python/pyarrow/table.pxi             |  5 ++++-
 python/pyarrow/tests/test_parquet.py |  2 +-
 python/pyarrow/tests/test_table.py   | 24 ++++++++++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/table.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 68eb5cb..028797e 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -315,7 +315,7 @@ cdef int _schema_from_arrays(
 
     fields.resize(K)
 
-    if len(arrays) == 0:
+    if not K:
         raise ValueError('Must pass at least one array')
 
     if isinstance(arrays[0], Column):
@@ -328,6 +328,9 @@ cdef int _schema_from_arrays(
         if names is None:
             raise ValueError('Must pass names when constructing '
                              'from Array objects')
+        if len(names) != K:
+            raise ValueError("Length of names ({}) does not match "
+                             "length of arrays ({})".format(len(names), K))
         for i in range(K):
             val = arrays[i]
             if isinstance(val, (Array, ChunkedArray)):

http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 6266878..eb23894 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -57,7 +57,7 @@ def test_single_pylist_column_roundtrip(tmpdir):
         filename = tmpdir.join('single_{}_column.parquet'
                                .format(dtype.__name__))
         data = [pa.array(list(map(dtype, range(5))))]
-        table = pa.Table.from_arrays(data, names=('a', 'b'))
+        table = pa.Table.from_arrays(data, names=['a'])
         _write_table(table, filename.strpath)
         table_read = _read_table(filename.strpath)
         for col_written, col_read in zip(table.itercolumns(),

http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 311d64e..4a2868a 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -82,6 +82,18 @@ def test_recordbatch_basics():
         batch[2]
 
 
+def test_recordbatch_from_arrays_invalid_names():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    with pytest.raises(ValueError):
+        pa.RecordBatch.from_arrays(data, names=['a', 'b', 'c'])
+
+    with pytest.raises(ValueError):
+        pa.RecordBatch.from_arrays(data, names=['a'])
+
+
 def test_recordbatch_empty_metadata():
     data = [
         pa.array(range(5)),
@@ -200,6 +212,18 @@ def test_table_basics():
             assert chunk is not None
 
 
+def test_table_from_arrays_invalid_names():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, names=['a', 'b', 'c'])
+
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, names=['a'])
+
+
 def test_table_add_column():
     data = [
         pa.array(range(5)),