You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/27 14:58:20 UTC
[arrow] branch master updated: ARROW-3338: [Python] Crash when
schema and columns do not match
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 830a2b1 ARROW-3338: [Python] Crash when schema and columns do not match
830a2b1 is described below
commit 830a2b1ccc23e8683864af3461ebf92ef159c604
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Thu Sep 27 10:58:07 2018 -0400
ARROW-3338: [Python] Crash when schema and columns do not match
Author: Krisztián Szűcs <sz...@gmail.com>
Closes #2643 from kszucs/ARROW-3338 and squashes the following commits:
9389d608a <Krisztián Szűcs> make test case python27 compatible
733e18fdd <Krisztián Szűcs> fix schema validation in Table::FromRecordBatches
---
cpp/src/arrow/table.cc | 2 +-
python/pyarrow/tests/test_table.py | 21 +++++++++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 9919085..96c71c1 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -412,7 +412,7 @@ Status Table::FromRecordBatches(const std::shared_ptr<Schema>& schema,
const int nbatches = static_cast<int>(batches.size());
const int ncolumns = static_cast<int>(schema->num_fields());
- for (int i = 1; i < nbatches; ++i) {
+ for (int i = 0; i < nbatches; ++i) {
if (!batches[i]->schema()->Equals(*schema, false)) {
std::stringstream ss;
ss << "Schema at index " << static_cast<int>(i) << " was different: \n"
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index a6567d5..0b397f6 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -499,6 +499,27 @@ def test_recordbatchlist_schema_equals():
pa.Table.from_batches([batch1, batch2])
+def test_table_from_batches_and_schema():
+ schema = pa.schema([
+ pa.field('a', pa.int64()),
+ pa.field('b', pa.float64()),
+ ])
+ batch = pa.RecordBatch.from_arrays([pa.array([1]), pa.array([3.14])],
+ names=['a', 'b'])
+ table = pa.Table.from_batches([batch], schema)
+ assert table.schema.equals(schema)
+ assert table.column(0) == pa.column('a', pa.array([1]))
+ assert table.column(1) == pa.column('b', pa.array([3.14]))
+
+ incompatible_schema = pa.schema([pa.field('a', pa.int64())])
+ with pytest.raises(pa.ArrowInvalid):
+ pa.Table.from_batches([batch], incompatible_schema)
+
+ incompatible_batch = pa.RecordBatch.from_arrays([pa.array([1])], ['a'])
+ with pytest.raises(pa.ArrowInvalid):
+ pa.Table.from_batches([incompatible_batch], schema)
+
+
def test_table_to_batches():
df1 = pd.DataFrame({'a': list(range(10))})
df2 = pd.DataFrame({'a': list(range(10, 30))})