You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/12/27 16:27:06 UTC

[arrow] branch master updated: ARROW-4088: [Python] Table.from_batches() fails when passed a schema with metadata

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 6781c2d  ARROW-4088: [Python] Table.from_batches() fails when passed a schema with metadata
6781c2d is described below

commit 6781c2da8915f99eaa8438cce25329152a0defc3
Author: Krisztián Szűcs <sz...@gmail.com>
AuthorDate: Thu Dec 27 17:26:55 2018 +0100

    ARROW-4088: [Python] Table.from_batches() fails when passed a schema with metadata
    
    Author: Krisztián Szűcs <sz...@gmail.com>
    
    Closes #3256 from kszucs/ARROW-4088 and squashes the following commits:
    
    b2698995 <Krisztián Szűcs> turn off check_metadata
    cf5c0829 <Krisztián Szűcs> propagate check_metadata to Schema's fields
---
 cpp/src/arrow/type-test.cc          | 20 ++++++++++++--------
 cpp/src/arrow/type.cc               | 12 +++++++-----
 cpp/src/arrow/type.h                |  4 ++--
 python/pyarrow/tests/test_schema.py | 14 ++++++++++++++
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc
index 5b758d7..ec82e0a 100644
--- a/cpp/src/arrow/type-test.cc
+++ b/cpp/src/arrow/type-test.cc
@@ -58,6 +58,7 @@ TEST(TestField, Equals) {
   ASSERT_TRUE(f0.Equals(f0_other));
   ASSERT_FALSE(f0.Equals(f0_nn));
   ASSERT_FALSE(f0.Equals(f0_with_meta));
+  ASSERT_TRUE(f0.Equals(f0_with_meta, false));
 }
 
 TEST(TestField, TestMetadataConstruction) {
@@ -200,28 +201,31 @@ TEST_F(TestSchema, GetFieldIndex) {
 }
 
 TEST_F(TestSchema, TestMetadataConstruction) {
-  auto f0 = field("f0", int32());
-  auto f1 = field("f1", uint8(), false);
-  auto f2 = field("f2", utf8());
   auto metadata0 = key_value_metadata({{"foo", "bar"}, {"bizz", "buzz"}});
   auto metadata1 = key_value_metadata({{"foo", "baz"}});
 
-  auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0);
-  ASSERT_TRUE(metadata0->Equals(*schema0->metadata()));
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8(), false);
+  auto f2 = field("f2", utf8(), true);
+  auto f3 = field("f2", utf8(), true, metadata1->Copy());
 
+  auto schema0 = ::arrow::schema({f0, f1, f2}, metadata0);
   auto schema1 = ::arrow::schema({f0, f1, f2}, metadata1);
-  ASSERT_TRUE(metadata1->Equals(*schema1->metadata()));
-
   auto schema2 = ::arrow::schema({f0, f1, f2}, metadata0->Copy());
-  ASSERT_TRUE(metadata0->Equals(*schema2->metadata()));
+  auto schema3 = ::arrow::schema({f0, f1, f3}, metadata0->Copy());
 
+  ASSERT_TRUE(metadata0->Equals(*schema0->metadata()));
+  ASSERT_TRUE(metadata1->Equals(*schema1->metadata()));
+  ASSERT_TRUE(metadata0->Equals(*schema2->metadata()));
   ASSERT_TRUE(schema0->Equals(*schema2));
   ASSERT_FALSE(schema0->Equals(*schema1));
   ASSERT_FALSE(schema2->Equals(*schema1));
+  ASSERT_FALSE(schema2->Equals(*schema3));
 
   // don't check metadata
   ASSERT_TRUE(schema0->Equals(*schema1, false));
   ASSERT_TRUE(schema2->Equals(*schema1, false));
+  ASSERT_TRUE(schema2->Equals(*schema3, false));
 }
 
 TEST_F(TestSchema, TestAddMetadata) {
diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc
index ee7fda7..a8372b9 100644
--- a/cpp/src/arrow/type.cc
+++ b/cpp/src/arrow/type.cc
@@ -65,13 +65,15 @@ std::vector<std::shared_ptr<Field>> Field::Flatten() const {
   return flattened;
 }
 
-bool Field::Equals(const Field& other) const {
+bool Field::Equals(const Field& other, bool check_metadata) const {
   if (this == &other) {
     return true;
   }
   if (this->name_ == other.name_ && this->nullable_ == other.nullable_ &&
       this->type_->Equals(*other.type_.get())) {
-    if (this->HasMetadata() && other.HasMetadata()) {
+    if (!check_metadata) {
+      return true;
+    } else if (this->HasMetadata() && other.HasMetadata()) {
       return metadata_->Equals(*other.metadata_);
     } else if (!this->HasMetadata() && !other.HasMetadata()) {
       return true;
@@ -82,8 +84,8 @@ bool Field::Equals(const Field& other) const {
   return false;
 }
 
-bool Field::Equals(const std::shared_ptr<Field>& other) const {
-  return Equals(*other.get());
+bool Field::Equals(const std::shared_ptr<Field>& other, bool check_metadata) const {
+  return Equals(*other.get(), check_metadata);
 }
 
 std::string Field::ToString() const {
@@ -333,7 +335,7 @@ bool Schema::Equals(const Schema& other, bool check_metadata) const {
     return false;
   }
   for (int i = 0; i < num_fields(); ++i) {
-    if (!field(i)->Equals(*other.field(i).get())) {
+    if (!field(i)->Equals(*other.field(i).get(), check_metadata)) {
       return false;
     }
   }
diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h
index eb00f43..0758ced 100644
--- a/cpp/src/arrow/type.h
+++ b/cpp/src/arrow/type.h
@@ -265,8 +265,8 @@ class ARROW_EXPORT Field {
 
   std::vector<std::shared_ptr<Field>> Flatten() const;
 
-  bool Equals(const Field& other) const;
-  bool Equals(const std::shared_ptr<Field>& other) const;
+  bool Equals(const Field& other, bool check_metadata = true) const;
+  bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = true) const;
 
   /// \brief Return a string representation ot the field
   std::string ToString() const;
diff --git a/python/pyarrow/tests/test_schema.py b/python/pyarrow/tests/test_schema.py
index 5385c3c..8549d61 100644
--- a/python/pyarrow/tests/test_schema.py
+++ b/python/pyarrow/tests/test_schema.py
@@ -334,6 +334,20 @@ def test_schema_equals():
     assert not sch1.equals(sch3)
 
 
+def test_schema_equals_propagates_check_metadata():
+    # ARROW-4088
+    schema1 = pa.schema([
+        pa.field('foo', pa.int32()),
+        pa.field('bar', pa.string())
+    ])
+    schema2 = pa.schema([
+        pa.field('foo', pa.int32()),
+        pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
+    ])
+    assert not schema1.equals(schema2)
+    assert schema1.equals(schema2, check_metadata=False)
+
+
 def test_schema_equality_operators():
     fields = [
         pa.field('foo', pa.int32()),