You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/01/16 00:49:26 UTC
[arrow] branch master updated: GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)
This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new bec55d711f GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)
bec55d711f is described below
commit bec55d711ffe1689186b67843a627da7a60e1db2
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon Jan 16 09:49:19 2023 +0900
GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)
# Which issue does this PR close?
Closes #33670
# Rationale for this change
It's needed to create a project node in GLib.
# What changes are included in this PR?
Add a binding for `arrow::compute::ProjectNodeOptions`.
# Are these changes tested?
Yes.
# Are there any user-facing changes?
Yes.
* Closes: #33670
Authored-by: Sutou Kouhei <ko...@clear-code.com>
Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
c_glib/arrow-glib/compute.cpp | 92 +++++++++++++++++++++++++++++++++++++++-
c_glib/arrow-glib/compute.h | 24 +++++++++++
c_glib/test/test-project-node.rb | 83 ++++++++++++++++++++++++++++++++++++
3 files changed, 198 insertions(+), 1 deletion(-)
diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 41deee9b6e..27e49b0027 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -26,6 +26,7 @@
#include <arrow-glib/datum.hpp>
#include <arrow-glib/enums.h>
#include <arrow-glib/error.hpp>
+#include <arrow-glib/expression.hpp>
#include <arrow-glib/reader.hpp>
#include <arrow-glib/record-batch.hpp>
#include <arrow-glib/scalar.hpp>
@@ -109,7 +110,6 @@ namespace {
return
(sort_key.target == other_sort_key.target) &&
(sort_key.order == other_sort_key.order);
-
}
}
@@ -136,6 +136,8 @@ G_BEGIN_DECLS
*
* #GArrowSourceNodeOptions is a class to customize a source node.
*
+ * #GArrowProjectNodeOptions is a class to customize a project node.
+ *
* #GArrowAggregation is a class to specify how to aggregate.
*
* #GArrowAggregateNodeOptions is a class to customize an aggregate node.
@@ -1014,6 +1016,61 @@ garrow_source_node_options_new_table(GArrowTable *table)
}
+G_DEFINE_TYPE(GArrowProjectNodeOptions,
+ garrow_project_node_options,
+ GARROW_TYPE_EXECUTE_NODE_OPTIONS)
+
+static void
+garrow_project_node_options_init(GArrowProjectNodeOptions *object)
+{
+}
+
+static void
+garrow_project_node_options_class_init(GArrowProjectNodeOptionsClass *klass)
+{
+}
+
+/**
+ * garrow_project_node_options_new:
+ * @expressions: (element-type GArrowExpression):
+ * A list of #GArrowExpression to be executed.
+ * @names: (nullable) (array length=n_names):
+ * A list of output column names of @expressions. If @names is %NULL,
+ * the string representations of @expressions will be used.
+ * @n_names: The number of @names.
+ *
+ * Returns: A newly created #GArrowProjectNodeOptions.
+ *
+ * Since: 11.0.0
+ */
+GArrowProjectNodeOptions *
+garrow_project_node_options_new(GList *expressions,
+ gchar **names,
+ gsize n_names)
+{
+ std::vector<arrow::compute::Expression> arrow_expressions;
+ std::vector<std::string> arrow_names;
+ for (auto node = expressions; node; node = g_list_next(node)) {
+ auto expression = GARROW_EXPRESSION(node->data);
+ arrow_expressions.push_back(*garrow_expression_get_raw(expression));
+ }
+ for (gsize i = 0; i < n_names; ++i) {
+ arrow_names.emplace_back(names[i]);
+ }
+ if (!arrow_names.empty()) {
+ for (size_t i = arrow_names.size(); i < arrow_expressions.size(); ++i) {
+ arrow_names.push_back(arrow_expressions[i].ToString());
+ }
+ }
+ auto arrow_options =
+ new arrow::compute::ProjectNodeOptions(arrow_expressions, arrow_names);
+ auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS,
+ "options", arrow_options,
+ NULL);
+ return GARROW_PROJECT_NODE_OPTIONS(options);
+}
+
+
typedef struct GArrowAggregationPrivate_ {
gchar *function;
GArrowFunctionOptions *options;
@@ -1771,6 +1828,39 @@ garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
error);
}
+/**
+ * garrow_execute_plan_build_project_node:
+ * @plan: A #GArrowExecutePlan.
+ * @input: A #GArrowExecuteNode.
+ * @options: A #GArrowProjectNodeOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * This is a shortcut of garrow_execute_plan_build_node() for project
+ * node.
+ *
+ * Returns: (transfer full): A newly built and added #GArrowExecuteNode
+ * for project on success, %NULL on error.
+ *
+ * Since: 11.0.0
+ */
+GArrowExecuteNode *
+garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
+ GArrowExecuteNode *input,
+ GArrowProjectNodeOptions *options,
+ GError **error)
+{
+ GList *inputs = nullptr;
+ inputs = g_list_prepend(inputs, input);
+ auto node =
+ garrow_execute_plan_build_node(plan,
+ "project",
+ inputs,
+ GARROW_EXECUTE_NODE_OPTIONS(options),
+ error);
+ g_list_free(inputs);
+ return node;
+}
+
/**
* garrow_execute_plan_build_aggregate_node:
* @plan: A #GArrowExecutePlan.
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 360ae3857e..1ac1d05258 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -156,6 +156,24 @@ GArrowSourceNodeOptions *
garrow_source_node_options_new_table(GArrowTable *table);
+#define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions,
+ garrow_project_node_options,
+ GARROW,
+ PROJECT_NODE_OPTIONS,
+ GArrowExecuteNodeOptions)
+struct _GArrowProjectNodeOptionsClass
+{
+ GArrowExecuteNodeOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_11_0
+GArrowProjectNodeOptions *
+garrow_project_node_options_new(GList *expressions,
+ gchar **names,
+ gsize n_names);
+
+
#define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type())
G_DECLARE_DERIVABLE_TYPE(GArrowAggregation,
garrow_aggregation,
@@ -321,6 +339,12 @@ GArrowExecuteNode *
garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
GArrowSourceNodeOptions *options,
GError **error);
+GARROW_AVAILABLE_IN_11_0
+GArrowExecuteNode *
+garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
+ GArrowExecuteNode *input,
+ GArrowProjectNodeOptions *options,
+ GError **error);
GARROW_AVAILABLE_IN_6_0
GArrowExecuteNode *
garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan,
diff --git a/c_glib/test/test-project-node.rb b/c_glib/test/test-project-node.rb
new file mode 100644
index 0000000000..758f225453
--- /dev/null
+++ b/c_glib/test/test-project-node.rb
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestProjectNode < Test::Unit::TestCase
+ include Helper::Buildable
+
+ def execute_plan(options)
+ plan = Arrow::ExecutePlan.new
+ numbers = build_int8_array([1, 2, 3, 4, 5])
+ strings = build_string_array(["a", "b", "a", "b", "a"])
+ table = build_table(number: numbers,
+ string: strings)
+ source_node_options = Arrow::SourceNodeOptions.new(table)
+ source_node = plan.build_source_node(source_node_options)
+ project_node = plan.build_project_node(source_node, options)
+ sink_node_options = Arrow::SinkNodeOptions.new
+ sink_node = plan.build_sink_node(project_node,
+ sink_node_options)
+ plan.validate
+ plan.start
+ plan.wait
+ reader = sink_node_options.get_reader(project_node.output_schema)
+ table = reader.read_all
+ plan.stop
+ table
+ end
+
+ def test_expressions
+ three_scalar = Arrow::Int8Scalar.new(3)
+ three_datum = Arrow::ScalarDatum.new(three_scalar)
+ expressions = [
+ Arrow::FieldExpression.new("number"),
+ Arrow::CallExpression.new("multiply",
+ [
+ Arrow::FieldExpression.new("number"),
+ Arrow::LiteralExpression.new(three_datum),
+ ]),
+ ]
+ options = Arrow::ProjectNodeOptions.new(expressions)
+ assert_equal(build_table("number" => [
+ build_int8_array([1, 2, 3, 4, 5]),
+ ],
+ "multiply(number, 3)" => [
+ build_int8_array([3, 6, 9, 12, 15]),
+ ]),
+ execute_plan(options))
+ end
+
+ def test_names
+ three_scalar = Arrow::Int8Scalar.new(3)
+ three_datum = Arrow::ScalarDatum.new(three_scalar)
+ expressions = [
+ Arrow::CallExpression.new("multiply",
+ [
+ Arrow::FieldExpression.new("number"),
+ Arrow::LiteralExpression.new(three_datum),
+ ]),
+ Arrow::FieldExpression.new("number"),
+ ]
+ options = Arrow::ProjectNodeOptions.new(expressions, ["number * 3"])
+ assert_equal(build_table("number * 3" => [
+ build_int8_array([3, 6, 9, 12, 15]),
+ ],
+ "number" => [
+ build_int8_array([1, 2, 3, 4, 5]),
+ ]),
+ execute_plan(options))
+ end
+end