You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ko...@apache.org on 2023/01/16 00:49:26 UTC

[arrow] branch master updated: GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)

This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new bec55d711f GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)
bec55d711f is described below

commit bec55d711ffe1689186b67843a627da7a60e1db2
Author: Sutou Kouhei <ko...@clear-code.com>
AuthorDate: Mon Jan 16 09:49:19 2023 +0900

    GH-33670: [GLib] Add `GArrowProjectNodeOptions` (#33677)
    
    # Which issue does this PR close?
    
    Closes #33670
    
    # Rationale for this change
    
    It's needed to create a project node in GLib.
    
    # What changes are included in this PR?
    
    Add a binding for `arrow::compute::ProjectNodeOptions`.
    
    # Are these changes tested?
    
    Yes.
    
    # Are there any user-facing changes?
    
    Yes.
    * Closes: #33670
    
    Authored-by: Sutou Kouhei <ko...@clear-code.com>
    Signed-off-by: Sutou Kouhei <ko...@clear-code.com>
---
 c_glib/arrow-glib/compute.cpp    | 92 +++++++++++++++++++++++++++++++++++++++-
 c_glib/arrow-glib/compute.h      | 24 +++++++++++
 c_glib/test/test-project-node.rb | 83 ++++++++++++++++++++++++++++++++++++
 3 files changed, 198 insertions(+), 1 deletion(-)

diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp
index 41deee9b6e..27e49b0027 100644
--- a/c_glib/arrow-glib/compute.cpp
+++ b/c_glib/arrow-glib/compute.cpp
@@ -26,6 +26,7 @@
 #include <arrow-glib/datum.hpp>
 #include <arrow-glib/enums.h>
 #include <arrow-glib/error.hpp>
+#include <arrow-glib/expression.hpp>
 #include <arrow-glib/reader.hpp>
 #include <arrow-glib/record-batch.hpp>
 #include <arrow-glib/scalar.hpp>
@@ -109,7 +110,6 @@ namespace {
     return
       (sort_key.target == other_sort_key.target) &&
       (sort_key.order == other_sort_key.order);
-
   }
 }
 
@@ -136,6 +136,8 @@ G_BEGIN_DECLS
  *
  * #GArrowSourceNodeOptions is a class to customize a source node.
  *
+ * #GArrowProjectNodeOptions is a class to customize a project node.
+ *
  * #GArrowAggregation is a class to specify how to aggregate.
  *
  * #GArrowAggregateNodeOptions is a class to customize an aggregate node.
@@ -1014,6 +1016,61 @@ garrow_source_node_options_new_table(GArrowTable *table)
 }
 
 
+G_DEFINE_TYPE(GArrowProjectNodeOptions,
+              garrow_project_node_options,
+              GARROW_TYPE_EXECUTE_NODE_OPTIONS)
+
+static void
+garrow_project_node_options_init(GArrowProjectNodeOptions *object)
+{
+}
+
+static void
+garrow_project_node_options_class_init(GArrowProjectNodeOptionsClass *klass)
+{
+}
+
+/**
+ * garrow_project_node_options_new:
+ * @expressions: (element-type GArrowExpression):
+ *   A list of #GArrowExpression to be executed.
+ * @names: (nullable) (array length=n_names):
+ *   A list of output column names of @expressions. If @names is %NULL,
+ *   the string representations of @expressions will be used.
+ * @n_names: The number of @names.
+ *
+ * Returns: A newly created #GArrowProjectNodeOptions.
+ *
+ * Since: 11.0.0
+ */
+GArrowProjectNodeOptions *
+garrow_project_node_options_new(GList *expressions,
+                                gchar **names,
+                                gsize n_names)
+{
+  std::vector<arrow::compute::Expression> arrow_expressions;
+  std::vector<std::string> arrow_names;
+  for (auto node = expressions; node; node = g_list_next(node)) {
+    auto expression = GARROW_EXPRESSION(node->data);
+    arrow_expressions.push_back(*garrow_expression_get_raw(expression));
+  }
+  for (gsize i = 0; i < n_names; ++i) {
+    arrow_names.emplace_back(names[i]);
+  }
+  if (!arrow_names.empty()) {
+    for (size_t i = arrow_names.size(); i < arrow_expressions.size(); ++i) {
+      arrow_names.push_back(arrow_expressions[i].ToString());
+    }
+  }
+  auto arrow_options =
+    new arrow::compute::ProjectNodeOptions(arrow_expressions, arrow_names);
+  auto options = g_object_new(GARROW_TYPE_PROJECT_NODE_OPTIONS,
+                              "options", arrow_options,
+                              NULL);
+  return GARROW_PROJECT_NODE_OPTIONS(options);
+}
+
+
 typedef struct GArrowAggregationPrivate_ {
   gchar *function;
   GArrowFunctionOptions *options;
@@ -1771,6 +1828,39 @@ garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
                                         error);
 }
 
+/**
+ * garrow_execute_plan_build_project_node:
+ * @plan: A #GArrowExecutePlan.
+ * @input: A #GArrowExecuteNode.
+ * @options: A #GArrowProjectNodeOptions.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * This is a shortcut of garrow_execute_plan_build_node() for project
+ * node.
+ *
+ * Returns: (transfer full): A newly built and added #GArrowExecuteNode
+ *   for project on success, %NULL on error.
+ *
+ * Since: 11.0.0
+ */
+GArrowExecuteNode *
+garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
+                                       GArrowExecuteNode *input,
+                                       GArrowProjectNodeOptions *options,
+                                       GError **error)
+{
+  GList *inputs = nullptr;
+  inputs = g_list_prepend(inputs, input);
+  auto node =
+    garrow_execute_plan_build_node(plan,
+                                   "project",
+                                   inputs,
+                                   GARROW_EXECUTE_NODE_OPTIONS(options),
+                                   error);
+  g_list_free(inputs);
+  return node;
+}
+
 /**
  * garrow_execute_plan_build_aggregate_node:
  * @plan: A #GArrowExecutePlan.
diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h
index 360ae3857e..1ac1d05258 100644
--- a/c_glib/arrow-glib/compute.h
+++ b/c_glib/arrow-glib/compute.h
@@ -156,6 +156,24 @@ GArrowSourceNodeOptions *
 garrow_source_node_options_new_table(GArrowTable *table);
 
 
+#define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type())
+G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions,
+                         garrow_project_node_options,
+                         GARROW,
+                         PROJECT_NODE_OPTIONS,
+                         GArrowExecuteNodeOptions)
+struct _GArrowProjectNodeOptionsClass
+{
+  GArrowExecuteNodeOptionsClass parent_class;
+};
+
+GARROW_AVAILABLE_IN_11_0
+GArrowProjectNodeOptions *
+garrow_project_node_options_new(GList *expressions,
+                                gchar **names,
+                                gsize n_names);
+
+
 #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type())
 G_DECLARE_DERIVABLE_TYPE(GArrowAggregation,
                          garrow_aggregation,
@@ -321,6 +339,12 @@ GArrowExecuteNode *
 garrow_execute_plan_build_source_node(GArrowExecutePlan *plan,
                                       GArrowSourceNodeOptions *options,
                                       GError **error);
+GARROW_AVAILABLE_IN_11_0
+GArrowExecuteNode *
+garrow_execute_plan_build_project_node(GArrowExecutePlan *plan,
+                                       GArrowExecuteNode *input,
+                                       GArrowProjectNodeOptions *options,
+                                       GError **error);
 GARROW_AVAILABLE_IN_6_0
 GArrowExecuteNode *
 garrow_execute_plan_build_aggregate_node(GArrowExecutePlan *plan,
diff --git a/c_glib/test/test-project-node.rb b/c_glib/test/test-project-node.rb
new file mode 100644
index 0000000000..758f225453
--- /dev/null
+++ b/c_glib/test/test-project-node.rb
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestProjectNode < Test::Unit::TestCase
+  include Helper::Buildable
+
+  def execute_plan(options)
+    plan = Arrow::ExecutePlan.new
+    numbers = build_int8_array([1, 2, 3, 4, 5])
+    strings = build_string_array(["a", "b", "a", "b", "a"])
+    table = build_table(number: numbers,
+                        string: strings)
+    source_node_options = Arrow::SourceNodeOptions.new(table)
+    source_node = plan.build_source_node(source_node_options)
+    project_node = plan.build_project_node(source_node, options)
+    sink_node_options = Arrow::SinkNodeOptions.new
+    sink_node = plan.build_sink_node(project_node,
+                                     sink_node_options)
+    plan.validate
+    plan.start
+    plan.wait
+    reader = sink_node_options.get_reader(project_node.output_schema)
+    table = reader.read_all
+    plan.stop
+    table
+  end
+
+  def test_expressions
+    three_scalar = Arrow::Int8Scalar.new(3)
+    three_datum = Arrow::ScalarDatum.new(three_scalar)
+    expressions = [
+      Arrow::FieldExpression.new("number"),
+      Arrow::CallExpression.new("multiply",
+                                [
+                                  Arrow::FieldExpression.new("number"),
+                                  Arrow::LiteralExpression.new(three_datum),
+                                ]),
+    ]
+    options = Arrow::ProjectNodeOptions.new(expressions)
+    assert_equal(build_table("number" => [
+                               build_int8_array([1, 2, 3, 4, 5]),
+                             ],
+                             "multiply(number, 3)" => [
+                               build_int8_array([3, 6, 9, 12, 15]),
+                             ]),
+                 execute_plan(options))
+  end
+
+  def test_names
+    three_scalar = Arrow::Int8Scalar.new(3)
+    three_datum = Arrow::ScalarDatum.new(three_scalar)
+    expressions = [
+      Arrow::CallExpression.new("multiply",
+                                [
+                                  Arrow::FieldExpression.new("number"),
+                                  Arrow::LiteralExpression.new(three_datum),
+                                ]),
+      Arrow::FieldExpression.new("number"),
+    ]
+    options = Arrow::ProjectNodeOptions.new(expressions, ["number * 3"])
+    assert_equal(build_table("number * 3" => [
+                               build_int8_array([3, 6, 9, 12, 15]),
+                             ],
+                             "number" => [
+                               build_int8_array([1, 2, 3, 4, 5]),
+                             ]),
+                 execute_plan(options))
+  end
+end