You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2017/11/29 17:48:43 UTC
[arrow] branch master updated: ARROW-1862: [GLib] Add
GArrowDictionaryArray
This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 5220283 ARROW-1862: [GLib] Add GArrowDictionaryArray
5220283 is described below
commit 52202833a4d779dc39c2648c77a19c1784c7578e
Author: Kouhei Sutou <ko...@clear-code.com>
AuthorDate: Wed Nov 29 12:48:39 2017 -0500
ARROW-1862: [GLib] Add GArrowDictionaryArray
Author: Kouhei Sutou <ko...@clear-code.com>
Closes #1365 from kou/glib-dictionary-array and squashes the following commits:
83bfa135 [Kouhei Sutou] [GLib] Add GArrowDictionaryArray
---
c_glib/arrow-glib/basic-array.cpp | 3 +
c_glib/arrow-glib/composite-array.cpp | 107 ++++++++++++++++++++++++++++++
c_glib/arrow-glib/composite-array.h | 21 ++++++
c_glib/arrow-glib/composite-data-type.cpp | 8 +++
c_glib/test/test-dictionary-array.rb | 63 ++++++++++++++++++
5 files changed, 202 insertions(+)
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index 0698a04..36cf460 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -2091,6 +2091,9 @@ garrow_array_new_raw(std::shared_ptr<arrow::Array> *arrow_array)
case arrow::Type::type::STRUCT:
type = GARROW_TYPE_STRUCT_ARRAY;
break;
+ case arrow::Type::type::DICTIONARY:
+ type = GARROW_TYPE_DICTIONARY_ARRAY;
+ break;
default:
type = GARROW_TYPE_ARRAY;
break;
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index 445103d..14cc46d 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -44,6 +44,11 @@ G_BEGIN_DECLS
* or more structs. One struct has zero or more fields. If you don't
* have Arrow format data, you need to use #GArrowStructArrayBuilder
* to create a new array.
+ *
+ * #GArrowDictionaryArray is a class for dictionary array. It can
+ * store data with dictionary and indices. It's space effective than
+ * normal array when the array has many same values. You can convert a
+ * normal array to dictionary array by garrow_array_dictionary_encode().
*/
G_DEFINE_TYPE(GArrowListArray, \
@@ -234,4 +239,106 @@ garrow_struct_array_get_fields(GArrowStructArray *array)
return g_list_reverse(fields);
}
+
+G_DEFINE_TYPE(GArrowDictionaryArray, \
+ garrow_dictionary_array, \
+ GARROW_TYPE_ARRAY)
+
+static void
+garrow_dictionary_array_init(GArrowDictionaryArray *object)
+{
+}
+
+static void
+garrow_dictionary_array_class_init(GArrowDictionaryArrayClass *klass)
+{
+}
+
+/**
+ * garrow_dictionary_array_new:
+ * @data_type: The data type of dictionary.
+ * @indices: The indices of values in dictionary.
+ *
+ * Returns: A newly created #GArrowDictionaryArray.
+ *
+ * Since: 0.8.0
+ */
+GArrowDictionaryArray *
+garrow_dictionary_array_new(GArrowDataType *data_type,
+ GArrowArray *indices)
+{
+ const auto arrow_data_type = garrow_data_type_get_raw(data_type);
+ const auto arrow_indices = garrow_array_get_raw(indices);
+ auto arrow_dictionary_array =
+ std::make_shared<arrow::DictionaryArray>(arrow_data_type,
+ arrow_indices);
+ auto arrow_array =
+ std::static_pointer_cast<arrow::Array>(arrow_dictionary_array);
+ return GARROW_DICTIONARY_ARRAY(garrow_array_new_raw(&arrow_array));
+}
+
+/**
+ * garrow_dictionary_array_get_indices:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The indices of values in dictionary.
+ *
+ * Since: 0.8.0
+ */
+GArrowArray *
+garrow_dictionary_array_get_indices(GArrowDictionaryArray *array)
+{
+ auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+ auto arrow_dictionary_array =
+ std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+ auto arrow_indices = arrow_dictionary_array->indices();
+ return garrow_array_new_raw(&arrow_indices);
+}
+
+/**
+ * garrow_dictionary_array_get_dictionary:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The dictionary of this array.
+ *
+ * Since: 0.8.0
+ */
+GArrowArray *
+garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array)
+{
+ auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+ auto arrow_dictionary_array =
+ std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+ auto arrow_dictionary = arrow_dictionary_array->dictionary();
+ return garrow_array_new_raw(&arrow_dictionary);
+}
+
+/**
+ * garrow_dictionary_array_get_dictionary_data_type:
+ * @array: A #GArrowDictionaryArray.
+ *
+ * Returns: (transfer full): The dictionary data type of this array.
+ *
+ * Since: 0.8.0
+ */
+GArrowDictionaryDataType *
+garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array)
+{
+ auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array));
+ auto arrow_dictionary_array =
+ std::static_pointer_cast<arrow::DictionaryArray>(arrow_array);
+ auto arrow_dictionary_data_type = arrow_dictionary_array->dict_type();
+ auto const_arrow_data_type =
+ static_cast<const arrow::DataType *>(arrow_dictionary_data_type);
+ auto arrow_data_type = const_cast<arrow::DataType *>(const_arrow_data_type);
+ struct NullDeleter {
+ void operator()(arrow::DataType *data_type) {
+ }
+ };
+ std::shared_ptr<arrow::DataType>
+ shared_arrow_data_type(arrow_data_type, NullDeleter());
+ auto data_type = garrow_data_type_new_raw(&shared_arrow_data_type);
+ return GARROW_DICTIONARY_DATA_TYPE(data_type);
+}
+
G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index ebf9554..c59a616 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -129,4 +129,25 @@ GArrowArray *garrow_struct_array_get_field(GArrowStructArray *array,
gint i);
GList *garrow_struct_array_get_fields(GArrowStructArray *array);
+
+#define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryArray,
+ garrow_dictionary_array,
+ GARROW,
+ DICTIONARY_ARRAY,
+ GArrowArray)
+struct _GArrowDictionaryArrayClass
+{
+ GArrowArrayClass parent_class;
+};
+
+GArrowDictionaryArray *
+garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices);
+GArrowArray *
+garrow_dictionary_array_get_indices(GArrowDictionaryArray *array);
+GArrowArray *
+garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array);
+GArrowDictionaryDataType *
+garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array);
+
G_END_DECLS
diff --git a/c_glib/arrow-glib/composite-data-type.cpp b/c_glib/arrow-glib/composite-data-type.cpp
index 7ce8a97..5f742e5 100644
--- a/c_glib/arrow-glib/composite-data-type.cpp
+++ b/c_glib/arrow-glib/composite-data-type.cpp
@@ -158,6 +158,8 @@ garrow_dictionary_data_type_class_init(GArrowDictionaryDataTypeClass *klass)
* @ordered: Whether dictionary contents are ordered or not.
*
* Returns: The newly created dictionary data type.
+ *
+ * Since: 0.8.0
*/
GArrowDictionaryDataType *
garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
@@ -177,6 +179,8 @@ garrow_dictionary_data_type_new(GArrowDataType *index_data_type,
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: (transfer full): The #GArrowDataType of index.
+ *
+ * Since: 0.8.0
*/
GArrowDataType *
garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_type)
@@ -193,6 +197,8 @@ garrow_dictionary_data_type_get_index_data_type(GArrowDictionaryDataType *data_t
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: (transfer full): The dictionary as #GArrowArray.
+ *
+ * Since: 0.8.0
*/
GArrowArray *
garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
@@ -209,6 +215,8 @@ garrow_dictionary_data_type_get_dictionary(GArrowDictionaryDataType *data_type)
* @data_type: The #GArrowDictionaryDataType.
*
* Returns: Whether dictionary contents are ordered or not.
+ *
+ * Since: 0.8.0
*/
gboolean
garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *data_type)
diff --git a/c_glib/test/test-dictionary-array.rb b/c_glib/test/test-dictionary-array.rb
new file mode 100644
index 0000000..d4f4b34
--- /dev/null
+++ b/c_glib/test/test-dictionary-array.rb
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDictionaryArray < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def setup
+ @index_data_type = Arrow::Int32DataType.new
+ @dictionary = build_string_array(["C", "C++", "Ruby"])
+ @ordered = false
+ @data_type = Arrow::DictionaryDataType.new(@index_data_type,
+ @dictionary,
+ @ordered)
+ end
+
+ sub_test_case(".new") do
+ def test_new
+ indices = build_int32_array([0, 2, 2, 1, 0])
+ dictionary_array = Arrow::DictionaryArray.new(@data_type, indices)
+ assert_equal(<<-STRING.chomp, dictionary_array.to_s)
+
+-- is_valid: all not null
+-- dictionary: ["C", "C++", "Ruby"]
+-- indices: [0, 2, 2, 1, 0]
+ STRING
+ end
+ end
+
+ sub_test_case("instance methods") do
+ def setup
+ super
+ @indices = build_int32_array([0, 2, 2, 1, 0])
+ @dictionary_array = Arrow::DictionaryArray.new(@data_type, @indices)
+ end
+
+ def test_indices
+ assert_equal(@indices, @dictionary_array.indices)
+ end
+
+ def test_dictionary
+ assert_equal(@dictionary, @dictionary_array.dictionary)
+ end
+
+ def test_dictionary_data_type
+ assert_equal(@data_type,
+ @dictionary_array.dictionary_data_type)
+ end
+ end
+end
--
To stop receiving notification emails like this one, please contact
['"commits@arrow.apache.org" <co...@arrow.apache.org>'].