You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2020/11/28 21:59:38 UTC
[arrow] branch master updated: ARROW-10754: [GLib] Add support for
metadata to GArrowField
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 3ebda83 ARROW-10754: [GLib] Add support for metadata to GArrowField
3ebda83 is described below
commit 3ebda8302b38e9a4ddf9fc36f91e43193654b233
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Sun Nov 29 06:58:42 2020 +0900
ARROW-10754: [GLib] Add support for metadata to GArrowField
Closes #8789 from kou/glib-field-metadata
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
c_glib/arrow-glib/Makefile.am | 1 +
c_glib/arrow-glib/field.cpp | 123 ++++++++++++++++++++++++++++++
c_glib/arrow-glib/field.h | 25 +++++-
c_glib/arrow-glib/internal-hash-table.hpp | 41 ++++++++++
c_glib/arrow-glib/meson.build | 1 +
c_glib/arrow-glib/schema.cpp | 13 +---
c_glib/test/test-field.rb | 88 ++++++++++++++++++---
7 files changed, 271 insertions(+), 21 deletions(-)
diff --git a/c_glib/arrow-glib/Makefile.am b/c_glib/arrow-glib/Makefile.am
index 92d7add..608b477 100644
--- a/c_glib/arrow-glib/Makefile.am
+++ b/c_glib/arrow-glib/Makefile.am
@@ -203,6 +203,7 @@ libarrow_glib_la_cpp_headers += \
endif
libarrow_glib_la_cpp_internal_headers = \
+ internal-hash-table.hpp \
internal-index.hpp
libarrow_glib_la_SOURCES = \
diff --git a/c_glib/arrow-glib/field.cpp b/c_glib/arrow-glib/field.cpp
index b8c07f3..745f115 100644
--- a/c_glib/arrow-glib/field.cpp
+++ b/c_glib/arrow-glib/field.cpp
@@ -23,6 +23,7 @@
#include <arrow-glib/data-type.hpp>
#include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
G_BEGIN_DECLS
@@ -237,6 +238,128 @@ garrow_field_to_string(GArrowField *field)
return g_strdup(arrow_field->ToString().c_str());
}
+/**
+ * garrow_field_to_string_metadata:
+ * @field: A #GArrowField.
+ * @show_metadata: Whether include metadata or not.
+ *
+ * Returns: The string representation of the field.
+ *
+ * It should be freed with g_free() when no longer needed.
+ *
+ * Since: 3.0.0
+ */
+gchar *
+garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ return g_strdup(arrow_field->ToString(show_metadata).c_str());
+}
+
+/**
+ * garrow_field_has_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: %TRUE if the field has metadata, %FALSE otherwise.
+ *
+ * Since: 3.0.0
+ */
+gboolean
+garrow_field_has_metadata(GArrowField *field)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ return arrow_field->HasMetadata();
+}
+
+/**
+ * garrow_field_get_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: (element-type utf8 utf8) (nullable) (transfer full): The
+ * metadata in the field.
+ *
+ * It should be freed with g_hash_table_unref() when no longer needed.
+ *
+ * Since: 3.0.0
+ */
+GHashTable *
+garrow_field_get_metadata(GArrowField *field)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ if (!arrow_field->HasMetadata()) {
+ return NULL;
+ }
+
+ auto arrow_metadata = arrow_field->metadata();
+ auto metadata = g_hash_table_new(g_str_hash, g_str_equal);
+ const auto n = arrow_metadata->size();
+ for (int64_t i = 0; i < n; ++i) {
+ g_hash_table_insert(metadata,
+ const_cast<gchar *>(arrow_metadata->key(i).c_str()),
+ const_cast<gchar *>(arrow_metadata->value(i).c_str()));
+ }
+ return metadata;
+}
+
+/**
+ * garrow_field_with_metadata:
+ * @field: A #GArrowField.
+ * @metadata: (element-type utf8 utf8): A new associated metadata.
+ *
+ * Returns: (transfer full): The new field with the given metadata.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_with_metadata(GArrowField *field,
+ GHashTable *metadata)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
+ auto arrow_new_field = arrow_field->WithMetadata(arrow_metadata);
+ return garrow_field_new_raw(&arrow_new_field,
+ garrow_field_get_data_type(field));
+}
+
+/**
+ * garrow_field_with_merged_metadata:
+ * @field: A #GArrowField.
+ * @metadata: (element-type utf8 utf8): An additional associated metadata.
+ *
+ * Returns: (transfer full): The new field that also has the given
+ * metadata. If both of the existing metadata and the given metadata
+ * have the same keys, the values in the given metadata are used.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_with_merged_metadata(GArrowField *field,
+ GHashTable *metadata)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
+ auto arrow_new_field = arrow_field->WithMergedMetadata(arrow_metadata);
+ return garrow_field_new_raw(&arrow_new_field,
+ garrow_field_get_data_type(field));
+}
+
+/**
+ * garrow_field_remove_metadata:
+ * @field: A #GArrowField.
+ *
+ * Returns: (transfer full): The new field that doesn't have metadata.
+ *
+ * Since: 3.0.0
+ */
+GArrowField *
+garrow_field_remove_metadata(GArrowField *field)
+{
+ const auto arrow_field = garrow_field_get_raw(field);
+ auto arrow_new_field = arrow_field->RemoveMetadata();
+ return garrow_field_new_raw(&arrow_new_field,
+ garrow_field_get_data_type(field));
+}
+
G_END_DECLS
GArrowField *
diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h
index d7b6840..bb2b66c 100644
--- a/c_glib/arrow-glib/field.h
+++ b/c_glib/arrow-glib/field.h
@@ -47,6 +47,29 @@ gboolean garrow_field_is_nullable (GArrowField *field);
gboolean garrow_field_equal (GArrowField *field,
GArrowField *other_field);
-gchar *garrow_field_to_string (GArrowField *field);
+gchar *
+garrow_field_to_string(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+gchar *
+garrow_field_to_string_metadata(GArrowField *field,
+ gboolean show_metadata);
+
+GARROW_AVAILABLE_IN_3_0
+gboolean
+garrow_field_has_metadata(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+GHashTable *
+garrow_field_get_metadata(GArrowField *field);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_with_metadata(GArrowField *field,
+ GHashTable *metadata);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_with_merged_metadata(GArrowField *field,
+ GHashTable *metadata);
+GARROW_AVAILABLE_IN_3_0
+GArrowField *
+garrow_field_remove_metadata(GArrowField *field);
G_END_DECLS
diff --git a/c_glib/arrow-glib/internal-hash-table.hpp b/c_glib/arrow-glib/internal-hash-table.hpp
new file mode 100644
index 0000000..3def460
--- /dev/null
+++ b/c_glib/arrow-glib/internal-hash-table.hpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#pragma once
+
+#include <glib.h>
+
+#include <arrow/api.h>
+
+static inline std::shared_ptr<arrow::KeyValueMetadata>
+garrow_internal_hash_table_to_metadata(GHashTable *metadata)
+{
+ auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>();
+ g_hash_table_foreach(metadata,
+ [](gpointer key,
+ gpointer value,
+ gpointer user_data) {
+ auto arrow_metadata =
+ static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
+ (*arrow_metadata)->Append(static_cast<gchar *>(key),
+ static_cast<gchar *>(value));
+ },
+ &arrow_metadata);
+ return arrow_metadata;
+}
diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build
index 0f18c10..53d55b1 100644
--- a/c_glib/arrow-glib/meson.build
+++ b/c_glib/arrow-glib/meson.build
@@ -184,6 +184,7 @@ if have_arrow_orc
endif
cpp_internal_headers = files(
+ 'internal-hash-table.hpp',
'internal-index.hpp',
)
diff --git a/c_glib/arrow-glib/schema.cpp b/c_glib/arrow-glib/schema.cpp
index bf66f56..aa4bb61 100644
--- a/c_glib/arrow-glib/schema.cpp
+++ b/c_glib/arrow-glib/schema.cpp
@@ -24,6 +24,7 @@
#include <arrow-glib/basic-data-type.hpp>
#include <arrow-glib/error.hpp>
#include <arrow-glib/field.hpp>
+#include <arrow-glib/internal-hash-table.hpp>
#include <arrow-glib/schema.hpp>
G_BEGIN_DECLS
@@ -418,17 +419,7 @@ garrow_schema_with_metadata(GArrowSchema *schema,
GHashTable *metadata)
{
const auto arrow_schema = garrow_schema_get_raw(schema);
- auto arrow_metadata = std::make_shared<arrow::KeyValueMetadata>();
- g_hash_table_foreach(metadata,
- [](gpointer key,
- gpointer value,
- gpointer user_data) {
- auto arrow_metadata =
- static_cast<std::shared_ptr<arrow::KeyValueMetadata> *>(user_data);
- (*arrow_metadata)->Append(static_cast<gchar *>(key),
- static_cast<gchar *>(value));
- },
- &arrow_metadata);
+ auto arrow_metadata = garrow_internal_hash_table_to_metadata(metadata);
auto arrow_new_schema = arrow_schema->WithMetadata(arrow_metadata);
return garrow_schema_new_raw(&arrow_new_schema);
}
diff --git a/c_glib/test/test-field.rb b/c_glib/test/test-field.rb
index 1b9c46e..3888746 100644
--- a/c_glib/test/test-field.rb
+++ b/c_glib/test/test-field.rb
@@ -16,31 +16,101 @@
# under the License.
class TestField < Test::Unit::TestCase
+ def setup
+ @data_type = Arrow::BooleanDataType.new
+ @field = Arrow::Field.new("enabled", @data_type)
+ @field_with_metadata = @field.with_metadata("key1" => "value1",
+ "key2" => "value2")
+ end
+
def test_equal
assert_equal(Arrow::Field.new("enabled", Arrow::BooleanDataType.new),
Arrow::Field.new("enabled", Arrow::BooleanDataType.new))
end
def test_name
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- assert_equal("enabled", field.name)
+ assert_equal("enabled", @field.name)
end
def test_data_type
- data_type = Arrow::BooleanDataType.new
- field = Arrow::Field.new("enabled", data_type)
- assert_equal(data_type.to_s, field.data_type.to_s)
+ assert_equal(@data_type.to_s,
+ @field.data_type.to_s)
end
def test_nullable?
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
assert do
- field.nullable?
+ @field.nullable?
end
end
def test_to_s
- field = Arrow::Field.new("enabled", Arrow::BooleanDataType.new)
- assert_equal("enabled: bool", field.to_s)
+ assert_equal("enabled: bool", @field_with_metadata.to_s)
+ end
+
+ sub_test_case("#to_string_metadata") do
+ def test_true
+ assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(true))
+enabled: bool
+-- metadata --
+key1: value1
+key2: value2
+ FIELD
+ end
+
+ def test_false
+ assert_equal(<<-FIELD.chomp, @field_with_metadata.to_string_metadata(false))
+enabled: bool
+ FIELD
+ end
+ end
+
+ sub_test_case("#has_metadata?") do
+ def test_existent
+ assert do
+ @field_with_metadata.has_metadata?
+ end
+ end
+
+ def test_nonexistent
+ assert do
+ not @field.has_metadata?
+ end
+ end
+ end
+
+ sub_test_case("#metadata") do
+ def test_existent
+ assert_equal({
+ "key1" => "value1",
+ "key2" => "value2",
+ },
+ @field_with_metadata.metadata)
+ end
+
+ def test_nonexistent
+ assert_nil(@field.metadata)
+ end
+ end
+
+ def test_with_metadata
+ field = @field_with_metadata.with_metadata("key3" => "value3")
+ assert_equal({"key3" => "value3"},
+ field.metadata)
+ end
+
+ def test_with_merged_metadata
+ field = @field_with_metadata.with_merged_metadata("key1" => "new-value1",
+ "key3" => "value3")
+ assert_equal({
+ "key1" => "new-value1",
+ "key2" => "value2",
+ "key3" => "value3",
+ },
+ field.metadata)
+ end
+
+ def test_remove_metadata
+ field = @field_with_metadata.remove_metadata
+ assert_nil(field.metadata)
end
end