You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2018/09/27 14:58:20 UTC
[arrow] branch master updated: ARROW-3338: [Python] Crash when schema and columns do not match

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 830a2b1  ARROW-3338: [Python] Crash when schema and columns do not match
830a2b1 is described below

commit 830a2b1ccc23e8683864af3461ebf92ef159c604
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Thu Sep 27 10:58:07 2018 -0400

    ARROW-3338: [Python] Crash when schema and columns do not match
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    
    Closes #2643 from kszucs/ARROW-3338 and squashes the following commits:
    
    9389d608a <Krisztián Szűcs> make test case python27 compatible
    733e18fdd <Krisztián Szűcs> fix schema validation in Table::FromRecordBatches
---
 cpp/src/arrow/table.cc             |  2 +-
 python/pyarrow/tests/test_table.py | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc
index 9919085..96c71c1 100644
--- a/cpp/src/arrow/table.cc
+++ b/cpp/src/arrow/table.cc
@@ -412,7 +412,7 @@ Status Table::FromRecordBatches(const std::shared_ptr<Schema>& schema,
   const int nbatches = static_cast<int>(batches.size());
   const int ncolumns = static_cast<int>(schema->num_fields());
 
-  for (int i = 1; i < nbatches; ++i) {
+  for (int i = 0; i < nbatches; ++i) {
     if (!batches[i]->schema()->Equals(*schema, false)) {
       std::stringstream ss;
       ss << "Schema at index " << static_cast<int>(i) << " was different: \n"
diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
index a6567d5..0b397f6 100644
--- a/python/pyarrow/tests/test_table.py
+++ b/python/pyarrow/tests/test_table.py
@@ -499,6 +499,27 @@ def test_recordbatchlist_schema_equals():
         pa.Table.from_batches([batch1, batch2])
 
 
+def test_table_from_batches_and_schema():
+    schema = pa.schema([
+        pa.field('a', pa.int64()),
+        pa.field('b', pa.float64()),
+    ])
+    batch = pa.RecordBatch.from_arrays([pa.array([1]), pa.array([3.14])],
+                                       names=['a', 'b'])
+    table = pa.Table.from_batches([batch], schema)
+    assert table.schema.equals(schema)
+    assert table.column(0) == pa.column('a', pa.array([1]))
+    assert table.column(1) == pa.column('b', pa.array([3.14]))
+
+    incompatible_schema = pa.schema([pa.field('a', pa.int64())])
+    with pytest.raises(pa.ArrowInvalid):
+        pa.Table.from_batches([batch], incompatible_schema)
+
+    incompatible_batch = pa.RecordBatch.from_arrays([pa.array([1])], ['a'])
+    with pytest.raises(pa.ArrowInvalid):
+        pa.Table.from_batches([incompatible_batch], schema)
+
+
 def test_table_to_batches():
     df1 = pd.DataFrame({'a': list(range(10))})
     df2 = pd.DataFrame({'a': list(range(10, 30))})