You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/09/20 17:36:34 UTC
arrow git commit: ARROW-1557 [Python] Validate names length in
Table.from_arrays
Repository: arrow
Updated Branches:
refs/heads/master 903d03b0b -> 9997a1a3b
ARROW-1557 [Python] Validate names length in Table.from_arrays
We now raise a ValueError when the length of the names doesn't match
the length of the arrays.
```python
In [1]: import pyarrow as pa
In [2]: pa.Table.from_arrays([pa.array([1, 2]), pa.array([3, 4])], names=['a', 'b', 'c'])
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-2-cda803f3f774> in <module>()
----> 1 pa.Table.from_arrays([pa.array([1, 2]), pa.array([3, 4])], names=['a', 'b', 'c'])
table.pxi in pyarrow.lib.Table.from_arrays()
table.pxi in pyarrow.lib._schema_from_arrays()
ValueError: Length of names (3) does not match length of arrays (2)
```
This affected `RecordBatch.from_arrays` and `Table.from_arrays`.
Author: Tom Augspurger <to...@gmail.com>
Author: Wes McKinney <we...@twosigma.com>
Closes #1117 from TomAugspurger/validate-names and squashes the following commits:
4df6f593 [Tom Augspurger] REF: avoid redundant len calculation
965a5608 [Wes McKinney] Fix test failure exposed in test_parquet.py
ed74d522 [Tom Augspurger] ARROW-1557 [Python] Validate names length in Table.from_arrays
Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/9997a1a3
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/9997a1a3
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/9997a1a3
Branch: refs/heads/master
Commit: 9997a1a3bacc397dc58645b2d8cfd0c3923efe01
Parents: 903d03b
Author: Tom Augspurger <to...@gmail.com>
Authored: Wed Sep 20 13:36:28 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Sep 20 13:36:28 2017 -0400
----------------------------------------------------------------------
python/pyarrow/table.pxi | 5 ++++-
python/pyarrow/tests/test_parquet.py | 2 +-
python/pyarrow/tests/test_table.py | 24 ++++++++++++++++++++++++
3 files changed, 29 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/table.pxi
----------------------------------------------------------------------
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 68eb5cb..028797e 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -315,7 +315,7 @@ cdef int _schema_from_arrays(
fields.resize(K)
- if len(arrays) == 0:
+ if not K:
raise ValueError('Must pass at least one array')
if isinstance(arrays[0], Column):
@@ -328,6 +328,9 @@ cdef int _schema_from_arrays(
if names is None:
raise ValueError('Must pass names when constructing '
'from Array objects')
+ if len(names) != K:
+ raise ValueError("Length of names ({}) does not match "
+ "length of arrays ({})".format(len(names), K))
for i in range(K):
val = arrays[i]
if isinstance(val, (Array, ChunkedArray)):
http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/tests/test_parquet.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index 6266878..eb23894 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -57,7 +57,7 @@ def test_single_pylist_column_roundtrip(tmpdir):
filename = tmpdir.join('single_{}_column.parquet'
.format(dtype.__name__))
data = [pa.array(list(map(dtype, range(5))))]
- table = pa.Table.from_arrays(data, names=('a', 'b'))
+ table = pa.Table.from_arrays(data, names=['a'])
_write_table(table, filename.strpath)
table_read = _read_table(filename.strpath)
for col_written, col_read in zip(table.itercolumns(),
http://git-wip-us.apache.org/repos/asf/arrow/blob/9997a1a3/python/pyarrow/tests/test_table.py
----------------------------------------------------------------------
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index 311d64e..4a2868a 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -82,6 +82,18 @@ def test_recordbatch_basics():
batch[2]
+def test_recordbatch_from_arrays_invalid_names():
+ data = [
+ pa.array(range(5)),
+ pa.array([-10, -5, 0, 5, 10])
+ ]
+ with pytest.raises(ValueError):
+ pa.RecordBatch.from_arrays(data, names=['a', 'b', 'c'])
+
+ with pytest.raises(ValueError):
+ pa.RecordBatch.from_arrays(data, names=['a'])
+
+
def test_recordbatch_empty_metadata():
data = [
pa.array(range(5)),
@@ -200,6 +212,18 @@ def test_table_basics():
assert chunk is not None
+def test_table_from_arrays_invalid_names():
+ data = [
+ pa.array(range(5)),
+ pa.array([-10, -5, 0, 5, 10])
+ ]
+ with pytest.raises(ValueError):
+ pa.Table.from_arrays(data, names=['a', 'b', 'c'])
+
+ with pytest.raises(ValueError):
+ pa.Table.from_arrays(data, names=['a'])
+
+
def test_table_add_column():
data = [
pa.array(range(5)),