You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/07/30 00:19:33 UTC
[arrow] branch master updated: ARROW-2926: [Python] Do not attempt
to write tables with invalid schemas in ParquetWriter.write_table
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 161d1f0 ARROW-2926: [Python] Do not attempt to write tables with invalid schemas in ParquetWriter.write_table
161d1f0 is described below
commit 161d1f02cf6954f78abd240c16680d4bddcc7864
Author: Wes McKinney <we...@apache.org>
AuthorDate: Sun Jul 29 20:19:27 2018 -0400
ARROW-2926: [Python] Do not attempt to write tables with invalid schemas in ParquetWriter.write_table
To be honest, `parquet::arrow::FileWriter` should probably also validate. I will open a corresponding issue "upstream"
Author: Wes McKinney <we...@apache.org>
Closes #2339 from wesm/ARROW-2926 and squashes the following commits:
8d2ee259 <Wes McKinney> Validate schemas in ParquetWriter.write_table
---
python/pyarrow/parquet.py | 7 +++++++
python/pyarrow/tests/test_parquet.py | 24 ++++++++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 2c1aef0..343758a 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py
@@ -327,6 +327,13 @@ schema : arrow Schema
if self.schema_changed:
table = _sanitize_table(table, self.schema, self.flavor)
assert self.is_open
+
+ if not table.schema.equals(self.schema):
+ msg = ('Table schema does not match schema used to create file: '
+ '\ntable:\n{0!s} vs. \nfile:\n{1!s}'.format(table.schema,
+ self.schema))
+ raise ValueError(msg)
+
self.writer.write_table(table, row_group_size=row_group_size)
def close(self):
diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py
index cc86ef1..1d3a6c1 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -685,6 +685,30 @@ def test_compare_schemas():
assert fileh.schema[0] != 'arbitrary object'
+def test_validate_schema_write_table(tmpdir):
+ # ARROW-2926
+ import pyarrow.parquet as pq
+
+ simple_fields = [
+ pa.field('POS', pa.uint32()),
+ pa.field('desc', pa.string())
+ ]
+
+ simple_schema = pa.schema(simple_fields)
+
+ # simple_table schema does not match simple_schema
+ simple_from_array = [pa.array([1]), pa.array(['bla'])]
+ simple_table = pa.Table.from_arrays(simple_from_array, ['POS', 'desc'])
+
+ path = tmpdir.join('simple_validate_schema.parquet').strpath
+
+ with pq.ParquetWriter(path, simple_schema,
+ version='2.0',
+ compression='snappy', flavor='spark') as w:
+ with pytest.raises(ValueError):
+ w.write_table(simple_table)
+
+
def test_column_of_arrays(tmpdir):
df, schema = dataframe_with_arrays()