You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2020/11/28 21:59:38 UTC

[arrow] branch master updated: ARROW-10754: [GLib] Add support for metadata to GArrowField

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3ebda83  ARROW-10754: [GLib] Add support for metadata to GArrowField
3ebda83 is described below

commit 3ebda8302b38e9a4ddf9fc36f91e43193654b233
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Sun Nov 29 06:58:42 2020 +0900

    ARROW-10754: [GLib] Add support for metadata to GArrowField
    
    Closes #8789 from kou/glib-field-metadata
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 c_glib/arrow-glib/Makefile.am             |   1 +
 c_glib/arrow-glib/field.cpp               | 123 ++++++++++++++++++++++++++++++
 c_glib/arrow-glib/field.h                 |  25 +++++-
 c_glib/arrow-glib/internal-hash-table.hpp |  41 ++++++++++
 c_glib/arrow-glib/meson.build             |   1 +
 c_glib/arrow-glib/schema.cpp              |  13 +---
 c_glib/test/test-field.rb                 |  88 ++++++++++++++++++---
 7 files changed, 271 insertions(+), 21 deletions(-)

diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index 92d7add..608b477 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -203,6 +203,7 @@ libarrow_glib_la_cpp_headers +=			\
 endif
 
 libarrow_glib_la_cpp_internal_headers =		\
+	internal-hash-table.hpp			\
 	internal-index.hpp
 
 libarrow_glib_la_SOURCES =			\
diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp
index b8c07f3..745f115 100644
--- a/c_glib/arrow-glib/field.cpp
+++ b/c_glib/arrow-glib/field.cpp
@@ -23,6 +23,7 @@
 
 #include <arrow-glib/data-type.hpp>
 #include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
 
 G_BEGIN_DECLS
 
@@ -237,6 +238,128 @@ garrow_field_to_string(GArrowField *field)
   return g_strdup(arrow_field->ToString().c_str());
 }
 
+/**
+ * garrow_field_to_string_metadata:
+ * @field: A #GArrowField.
+ * @show_metadata: Whether include metadata or not.
+ *
+ * Returns: The string representation of the field.
+ *
+ *   It should be freed with g_free() when no longer needed.
+ *
+ * Since: 3.0.0
+ */
+gchar *
+garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  return g_strdup(arrow_field->ToString(show_metadata).c_str());
+}
+
+/**
+ * garrow_field_has_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: %TRUE if the field has metadata, %FALSE otherwise.
+ *
+ * Since: 3.0.0
+ */
+gboolean
+garrow_field_has_metadata(GArrowField *field)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  return arrow_field->HasMetadata();
+}
+
+/**
+ * garrow_field_get_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: (element-type utf8 utf8) (nullable) (transfer full): The
+ *   metadata in the field.
+ *
+ *   It should be freed with g_hash_table_unref() when no longer needed.
+ *
+ * Since: 3.0.0
+ */
+GHashTable *
+garrow_field_get_metadata(GArrowField *field)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  if (!arrow_field->HasMetadata()) {
+    return NULL;
+  }
+
+  auto arrow_metadata = arrow_field->metadata();
+  auto metadata = g_hash_table_new(g_str_hash, g_str_equal);
+  const auto n = arrow_metadata->size();
+  for (int64_t i = 0; i < n; ++i) {
+    g_hash_table_insert(metadata,
+                        const_cast<gchar *>(arrow_metadata->key(i).c_str()),
+                        const_cast<gchar *>(arrow_metadata->value(i).c_str()));
+  }
+  return metadata;
+}
+
+/**
+ * garrow_field_with_metadata:
+ * @field: A #GArrowField.
+ * @metadata: (element-type utf8 utf8): A new associated metadata.
+ *
+ * Returns: (transfer full): The new field with the given metadata.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_with_metadata(GArrowField *field,
+                           GHashTable *metadata)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
+  auto arrow_new_field = arrow_field->WithMetadata(arrow_metadata);
+  return garrow_field_new_raw(&arrow_new_field,
+                              garrow_field_get_data_type(field));
+}
+
+/**
+ * garrow_field_with_merged_metadata:
+ * @field: A #GArrowField.
+ * @metadata: (element-type utf8 utf8): An additional associated metadata.
+ *
+ * Returns: (transfer full): The new field that also has the given
+ *   metadata. If both of the existing metadata and the given metadata
+ *   have the same keys, the values in the given metadata are used.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_with_merged_metadata(GArrowField *field,
+                                  GHashTable *metadata)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
+  auto arrow_new_field = arrow_field->WithMergedMetadata(arrow_metadata);
+  return garrow_field_new_raw(&arrow_new_field,
+                              garrow_field_get_data_type(field));
+}
+
+/**
+ * garrow_field_remove_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: (transfer full): The new field that doesn't have metadata.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_remove_metadata(GArrowField *field)
+{
+  const auto arrow_field = garrow_field_get_raw(field);
+  auto arrow_new_field = arrow_field->RemoveMetadata();
+  return garrow_field_new_raw(&arrow_new_field,
+                              garrow_field_get_data_type(field));
+}
+
 G_END_DECLS
 
 GArrowField *
diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h
index d7b6840..bb2b66c 100644
--- a/c_glib/arrow-glib/field.h
+++ b/c_glib/arrow-glib/field.h
@@ -47,6 +47,29 @@ gboolean        garrow_field_is_nullable   (GArrowField *field);
 gboolean        garrow_field_equal         (GArrowField *field,
                                             GArrowField *other_field);
 
-gchar          *garrow_field_to_string     (GArrowField *field);
+gchar *
+garrow_field_to_string(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+gchar *
+garrow_field_to_string_metadata(GArrowField *field,
+                                gboolean show_metadata);
+
+GARROW_AVAILABLE_IN_3_0
+gboolean
+garrow_field_has_metadata(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+GHashTable *
+garrow_field_get_metadata(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_with_metadata(GArrowField *field,
+                           GHashTable *metadata);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_with_merged_metadata(GArrowField *field,
+                                  GHashTable *metadata);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_remove_metadata(GArrowField *field);
 
 G_END_DECLS
diff --git a/c_glib/arrow-glib/internal-hash-table.hpp b/c_glib/arrow-glib/internal-hash-table.hpp
new file mode 100644
index 0000000..3def460
--- /dev/null
+++ b/c_glib/arrow-glib/internal-hash-table.hpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <glib.h>
+
+#include <arrow/api.h>
+
+static inline std::shared_ptr<arrow::KeyValueMetadata>
+garrow_internal_hash_table_to_metadata(GHashTable *metadata)
+{
+  auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>();
+  g_hash_table_foreach(metadata,
+                       [](gpointer key,
+                          gpointer value,
+                          gpointer user_data) {
+                         auto arrow_metadata =
+                           static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
+                         (*arrow_metadata)->Append(static_cast<gchar *>(key),
+                                                   static_cast<gchar *>(value));
+                       },
+                       &arrow_metadata);
+  return arrow_metadata;
+}
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 0f18c10..53d55b1 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -184,6 +184,7 @@ if have_arrow_orc
 endif
 
 cpp_internal_headers = files(
+  'internal-hash-table.hpp',
   'internal-index.hpp',
 )
 
diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp
index bf66f56..aa4bb61 100644
--- a/c_glib/arrow-glib/schema.cpp
+++ b/c_glib/arrow-glib/schema.cpp
@@ -24,6 +24,7 @@
 #include <arrow-glib/basic-data-type.hpp>
 #include <arrow-glib/error.hpp>
 #include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
 #include <arrow-glib/schema.hpp>
 
 G_BEGIN_DECLS
@@ -418,17 +419,7 @@ garrow_schema_with_metadata(GArrowSchema *schema,
                             GHashTable *metadata)
 {
   const auto arrow_schema = garrow_schema_get_raw(schema);
-  auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>();
-  g_hash_table_foreach(metadata,
-                       [](gpointer key,
-                          gpointer value,
-                          gpointer user_data) {
-                         auto arrow_metadata =
-                           static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
-                         (*arrow_metadata)->Append(static_cast<gchar *>(key),
-                                                   static_cast<gchar *>(value));
-                       },
-                       &arrow_metadata);
+  auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
   auto arrow_new_schema = arrow_schema->WithMetadata(arrow_metadata);
   return garrow_schema_new_raw(&arrow_new_schema);
 }
diff --git a/c_glib/test/test-field.rb b/c_glib/test/test-field.rb
index 1b9c46e..3888746 100644
--- a/c_glib/test/test-field.rb
+++ b/c_glib/test/test-field.rb
@@ -16,31 +16,101 @@
 # under the License.
 
 class TestField < Test::Unit::TestCase
+  def setup
+    @data_type = Arrow::BooleanDataType.new
+    @field = Arrow::Field.new("enabled", @data_type)
+    @field_with_metadata = @field.with_metadata("key1" => "value1",
+                                                "key2" => "value2")
+  end
+
   def test_equal
     assert_equal(Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
                  Arrow::Field.new("enabled", Arrow::BooleanDataType.new))
   end
 
   def test_name
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    assert_equal("enabled", field.name)
+    assert_equal("enabled", @field.name)
   end
 
   def test_data_type
-    data_type = Arrow::BooleanDataType.new
-    field = Arrow::Field.new("enabled", data_type)
-    assert_equal(data_type.to_s, field.data_type.to_s)
+    assert_equal(@data_type.to_s,
+                 @field.data_type.to_s)
   end
 
   def test_nullable?
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
     assert do
-      field.nullable?
+      @field.nullable?
     end
   end
 
   def test_to_s
-    field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
-    assert_equal("enabled: bool", field.to_s)
+    assert_equal("enabled: bool", @field_with_metadata.to_s)
+  end
+
+  sub_test_case("#to_string_metadata") do
+    def test_true
+      assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(true))
+enabled: bool
+-- metadata --
+key1: value1
+key2: value2
+      FIELD
+    end
+
+    def test_false
+      assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(false))
+enabled: bool
+      FIELD
+    end
+  end
+
+  sub_test_case("#has_metadata?") do
+    def test_existent
+      assert do
+        @field_with_metadata.has_metadata?
+      end
+    end
+
+    def test_nonexistent
+      assert do
+        not @field.has_metadata?
+      end
+    end
+  end
+
+  sub_test_case("#metadata") do
+    def test_existent
+      assert_equal({
+                     "key1" => "value1",
+                     "key2" => "value2",
+                   },
+                   @field_with_metadata.metadata)
+    end
+
+    def test_nonexistent
+      assert_nil(@field.metadata)
+    end
+  end
+
+  def test_with_metadata
+    field = @field_with_metadata.with_metadata("key3" => "value3")
+    assert_equal({"key3" => "value3"},
+                 field.metadata)
+  end
+
+  def test_with_merged_metadata
+    field = @field_with_metadata.with_merged_metadata("key1" => "new-value1",
+                                                      "key3" => "value3")
+    assert_equal({
+                   "key1" => "new-value1",
+                   "key2" => "value2",
+                   "key3" => "value3",
+                 },
+                 field.metadata)
+  end
+
+  def test_remove_metadata
+    field = @field_with_metadata.remove_metadata
+    assert_nil(field.metadata)
   end
 end