You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "kou (via GitHub)" <gi...@apache.org> on 2023/03/23 21:15:51 UTC

[GitHub] [arrow] kou commented on a diff in pull request #34711: GH-15280: [C++][Python][GLib] add libarrow_acero containing everything previously in compute/exec

kou commented on code in PR #34711:
URL: https://github.com/apache/arrow/pull/34711#discussion_r1146823936


##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -311,6 +311,7 @@ takes precedence over ccache if a storage backend is configured" ON)
                 OFF
                 DEPENDS
                 ARROW_COMPUTE

Review Comment:
   Could you remove `ARROW_COMPUTE` because `ARROW_ACERO` depends on it?
   
   ```suggestion
   ```



##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -380,6 +381,7 @@ takes precedence over ccache if a storage backend is configured" ON)
                 OFF
                 DEPENDS
                 ARROW_COMPUTE

Review Comment:
   ```suggestion
   ```



##########
cpp/examples/arrow/CMakeLists.txt:
##########
@@ -20,7 +20,13 @@ add_arrow_example(row_wise_conversion_example)
 add_arrow_example(rapidjson_row_converter)
 
 if(ARROW_COMPUTE)

Review Comment:
   Should this be `ARROW_ACERO`?
   
   ```suggestion
   if(ARROW_ACERO)
   ```



##########
cpp/examples/arrow/execution_plan_documentation_examples.cc:
##########
@@ -23,7 +23,7 @@
 #include <arrow/compute/api.h>
 #include <arrow/compute/api_vector.h>
 #include <arrow/compute/cast.h>
-#include <arrow/compute/exec/exec_plan.h>
+#include "arrow/acero/exec_plan.h"

Review Comment:
   Why?



##########
cpp/src/arrow/acero/ArrowAceroConfig.cmake.in:
##########
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This config sets the following variables in your project::
+#
+#   ArrowSubstrait_FOUND - true if Arrow Substrait found on the system

Review Comment:
   ```suggestion
   #   ArrowAcero_FOUND - true if Arrow Acero found on the system
   ```



##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+

Review Comment:
   ```suggestion
   ```



##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -380,6 +381,7 @@ takes precedence over ccache if a storage backend is configured" ON)
                 OFF
                 DEPENDS
                 ARROW_COMPUTE
+                ARROW_ACERO

Review Comment:
   Do we need this?
   It seems that `ARROW_DATASET` depends on `ARROW_ACERO`.



##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+    groupby.cc
+    accumulation_queue.cc
+    aggregate_node.cc
+    asof_join_node.cc
+    bloom_filter.cc
+    exec_plan.cc
+    fetch_node.cc
+    filter_node.cc
+    hash_join.cc
+    hash_join_dict.cc
+    hash_join_node.cc
+    map_node.cc
+    options.cc
+    order_by_node.cc
+    order_by_impl.cc
+    partition_util.cc
+    pivot_longer_node.cc
+    project_node.cc
+    query_context.cc
+    sink_node.cc
+    source_node.cc
+    swiss_join.cc
+    task_util.cc
+    tpch_node.cc
+    union_node.cc
+    util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND
+       ARROW_ACERO_SHARED_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+  list(APPEND
+       ARROW_ACERO_STATIC_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+              CMAKE_PACKAGE_NAME
+              ArrowAcero
+              PKG_CONFIG_NAME
+              arrow-acero
+              OUTPUTS
+              ARROW_ACERO_LIBRARIES
+              SOURCES
+              ${ARROW_ACERO_SRCS}
+              PRECOMPILED_HEADERS
+              "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+              SHARED_LINK_FLAGS
+              ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+              SHARED_LINK_LIBS
+              ${ARROW_ACERO_SHARED_LINK_LIBS}
+              SHARED_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_shared
+              STATIC_LINK_LIBS
+              ${ARROW_ACERO_STATIC_LINK_LIBS}
+              STATIC_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+  target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)

Review Comment:
   ```suggestion
   function(add_arrow_acero_test REL_TEST_NAME)
   ```



##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+    groupby.cc
+    accumulation_queue.cc
+    aggregate_node.cc
+    asof_join_node.cc
+    bloom_filter.cc
+    exec_plan.cc
+    fetch_node.cc
+    filter_node.cc
+    hash_join.cc
+    hash_join_dict.cc
+    hash_join_node.cc
+    map_node.cc
+    options.cc
+    order_by_node.cc
+    order_by_impl.cc
+    partition_util.cc
+    pivot_longer_node.cc
+    project_node.cc
+    query_context.cc
+    sink_node.cc
+    source_node.cc
+    swiss_join.cc
+    task_util.cc
+    tpch_node.cc
+    union_node.cc
+    util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND
+       ARROW_ACERO_SHARED_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+  list(APPEND
+       ARROW_ACERO_STATIC_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+              CMAKE_PACKAGE_NAME
+              ArrowAcero
+              PKG_CONFIG_NAME
+              arrow-acero
+              OUTPUTS
+              ARROW_ACERO_LIBRARIES
+              SOURCES
+              ${ARROW_ACERO_SRCS}
+              PRECOMPILED_HEADERS
+              "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+              SHARED_LINK_FLAGS
+              ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+              SHARED_LINK_LIBS
+              ${ARROW_ACERO_SHARED_LINK_LIBS}
+              SHARED_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_shared
+              STATIC_LINK_LIBS
+              ${ARROW_ACERO_STATIC_LINK_LIBS}
+              STATIC_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+  target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)
+  set(options REQUIRE_ALL_KERNELS)
+  set(one_value_args PREFIX)
+  set(multi_value_args LABELS)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+
+  if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
+    return()
+  endif()
+
+  if(ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
+  else()
+    set(PREFIX "arrow-acero")
+  endif()
+
+  if(ARG_LABELS)
+    set(LABELS ${ARG_LABELS})
+  else()
+    set(LABELS "arrow_acero")
+  endif()
+
+  add_arrow_test(${REL_TEST_NAME}
+                 EXTRA_LINK_LIBS
+                 ${ARROW_ACERO_TEST_LINK_LIBS}
+                 PREFIX
+                 ${PREFIX}
+                 SOURCES
+                 test_util_internal.cc
+                 LABELS
+                 ${LABELS}
+                 ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_acero_test(subtree_test
+                     REQUIRE_ALL_KERNELS
+                     PREFIX
+                     "arrow-acero"

Review Comment:
   Why do we need to specify `PREFIX "arrow-acero"` explicitly in all (?) `add_arrow_acero_test`?
   The default `PREFIX` is defined as `"arrow-acero"` in `add_arrow_acero_test`.



##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
               SHARED_LINK_FLAGS
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
+              arrow_acero_shared
               arrow_dataset_shared
               substrait
               SHARED_INSTALL_INTERFACE_LIBS
+              ArrowAcero::arrow_acero_shared
               ArrowDataset::arrow_dataset_shared
               STATIC_LINK_LIBS
+              arrow_acero_static

Review Comment:
   Ditto.



##########
cpp/src/arrow/acero/visibility.h:
##########
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.

Review Comment:
   Do we need this?



##########
cpp/src/arrow/compute/kernels/CMakeLists.txt:
##########
@@ -93,9 +90,12 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
 
 # Aggregates
 
-add_arrow_compute_test(aggregate_test
-                       SOURCES
-                       aggregate_test.cc
-                       hash_aggregate_test.cc
+add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc
+                       #                       hash_aggregate_test.cc

Review Comment:
   Is it WIP?



##########
cpp/src/arrow/engine/arrow-substrait.pc.in:
##########
@@ -23,5 +23,5 @@ Name: Apache Arrow Substrait Consumer
 Description: Apache Arrow's Substrait Consumer.
 Version: @ARROW_VERSION@
 Requires: arrow

Review Comment:
   ```suggestion
   Requires: arrow-acero
   ```



##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
               SHARED_LINK_FLAGS
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
+              arrow_acero_shared
               arrow_dataset_shared
               substrait
               SHARED_INSTALL_INTERFACE_LIBS
+              ArrowAcero::arrow_acero_shared
               ArrowDataset::arrow_dataset_shared
               STATIC_LINK_LIBS
+              arrow_acero_static
               arrow_dataset_static
               substrait
               STATIC_INSTALL_INTERFACE_LIBS
+              ArrowAcero::arrow_acero_static

Review Comment:
   Ditto.



##########
cpp/src/arrow/engine/arrow-substrait.pc.in:
##########
@@ -23,5 +23,5 @@ Name: Apache Arrow Substrait Consumer
 Description: Apache Arrow's Substrait Consumer.
 Version: @ARROW_VERSION@
 Requires: arrow
-Libs: -L${libdir} -larrow_substrait
+Libs: -L${libdir} -larrow_acero -larrow_substrait

Review Comment:
   ```suggestion
   Libs: -L${libdir} -larrow_substrait
   ```



##########
cpp/examples/arrow/CMakeLists.txt:
##########
@@ -20,7 +20,13 @@ add_arrow_example(row_wise_conversion_example)
 add_arrow_example(rapidjson_row_converter)
 
 if(ARROW_COMPUTE)
-  add_arrow_example(compute_register_example)
+  if(ARROW_BUILD_SHARED)
+    set(ENGINE_ACERO_EXAMPLE_LINK_LIBS arrow_acero_shared)
+  else()
+    set(ENGINE_ACERO_EXAMPLE_LINK_LIBS arrow_acero_static)
+  endif()
+  add_arrow_example(compute_register_example EXTRA_LINK_LIBS

Review Comment:
   Should we rename to `acero_register_example`?
   
   ```suggestion
     add_arrow_example(acero_register_example EXTRA_LINK_LIBS
   ```



##########
cpp/examples/arrow/compute_register_example.cc:
##########
@@ -17,10 +17,10 @@
 
 #include <arrow/api.h>
 #include <arrow/compute/api.h>
-#include <arrow/compute/exec/exec_plan.h>
-#include <arrow/compute/exec/options.h>
 #include <arrow/util/async_generator.h>
 #include <arrow/util/future.h>
+#include "arrow/acero/exec_plan.h"
+#include "arrow/acero/options.h"
 #include "arrow/compute/expression.h"

Review Comment:
   Could you use `#include <...>` instead of `#include "..."` because they don't exist at relative path from this file?



##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+    groupby.cc
+    accumulation_queue.cc
+    aggregate_node.cc
+    asof_join_node.cc
+    bloom_filter.cc
+    exec_plan.cc
+    fetch_node.cc
+    filter_node.cc
+    hash_join.cc
+    hash_join_dict.cc
+    hash_join_node.cc
+    map_node.cc
+    options.cc
+    order_by_node.cc
+    order_by_impl.cc
+    partition_util.cc
+    pivot_longer_node.cc
+    project_node.cc
+    query_context.cc
+    sink_node.cc
+    source_node.cc
+    swiss_join.cc
+    task_util.cc
+    tpch_node.cc
+    union_node.cc
+    util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+  list(APPEND
+       ARROW_ACERO_SHARED_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+  list(APPEND
+       ARROW_ACERO_STATIC_LINK_LIBS
+       opentelemetry-cpp::trace
+       opentelemetry-cpp::ostream_span_exporter
+       opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+              CMAKE_PACKAGE_NAME
+              ArrowAcero
+              PKG_CONFIG_NAME
+              arrow-acero
+              OUTPUTS
+              ARROW_ACERO_LIBRARIES
+              SOURCES
+              ${ARROW_ACERO_SRCS}
+              PRECOMPILED_HEADERS
+              "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+              SHARED_LINK_FLAGS
+              ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+              SHARED_LINK_LIBS
+              ${ARROW_ACERO_SHARED_LINK_LIBS}
+              SHARED_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_shared
+              STATIC_LINK_LIBS
+              ${ARROW_ACERO_STATIC_LINK_LIBS}
+              STATIC_INSTALL_INTERFACE_LIBS
+              Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+  target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+  list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)
+  set(options REQUIRE_ALL_KERNELS)
+  set(one_value_args PREFIX)
+  set(multi_value_args LABELS)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+
+  if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
+    return()
+  endif()

Review Comment:
   Do we need this?
   It seems that `ARROW_ACERO` depends on `ARROW_COMPUTE`.



##########
cpp/src/arrow/compute/kernels/CMakeLists.txt:
##########
@@ -93,9 +90,12 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
 
 # Aggregates
 
-add_arrow_compute_test(aggregate_test
-                       SOURCES
-                       aggregate_test.cc
-                       hash_aggregate_test.cc
+add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc
+                       #                       hash_aggregate_test.cc
                        test_util.cc)
-add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")
+# add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")

Review Comment:
   Is it WIP?



##########
cpp/src/arrow/acero/visibility.h:
##########
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_ACERO_STATIC
+#define ARROW_ACERO_EXPORT
+#elif defined(ARROW_ACERO_EXPORTING)
+#define ARROW_ACERO_EXPORT __declspec(dllexport)
+#else
+#define ARROW_ACERO_EXPORT __declspec(dllimport)
+#endif
+
+#define ARROW_ACERO_NO_EXPORT
+#else  // Not Windows
+#ifndef ARROW_ACERO_EXPORT
+#define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef ARROW_ACERO_NO_EXPORT
+#define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif  // Non-Windows

Review Comment:
   "Not Windows" like "#else  // Not Windows" above?



##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
               SHARED_LINK_FLAGS
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
+              arrow_acero_shared
               arrow_dataset_shared
               substrait
               SHARED_INSTALL_INTERFACE_LIBS
+              ArrowAcero::arrow_acero_shared

Review Comment:
   I think that we don't need this because `ArrowDataset::arrow_dataset_shared` has this dependency.



##########
cpp/src/arrow/compute/kernels/aggregate_benchmark.cc:
##########
@@ -21,7 +21,7 @@
 
 #include "arrow/array/array_primitive.h"
 #include "arrow/compute/api.h"
-#include "arrow/compute/exec/groupby.h"
+//#include "arrow/acero/groupby.h"

Review Comment:
   If this is needless, could you remove this?



##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
               SHARED_LINK_FLAGS
               ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
               SHARED_LINK_LIBS
+              arrow_acero_shared

Review Comment:
   I think that we don't need this because `arrow_dataset_shared` has this dependency.



##########
cpp/src/arrow/engine/substrait/test_util.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/vector.h"
+
+#include <functional>
+#include <random>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "arrow/acero/exec_plan.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/testing/visibility.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/pcg_random.h"
+
+namespace arrow {
+namespace compute {
+//

Review Comment:
   Do these comments mean WIP?



##########
python/CMakeLists.txt:
##########
@@ -113,6 +113,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF)
   option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF)
   option(PYARROW_BUILD_SUBSTRAIT "Build the PyArrow Substrait integration" OFF)
+  option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF)

Review Comment:
   Could you sort these `PYARROW_BUILD_*` lines?



##########
python/CMakeLists.txt:
##########
@@ -561,6 +572,19 @@ if(PYARROW_BUILD_CUDA)
   set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE)
 endif()
 
+# Acero
+if(PYARROW_BUILD_ACERO)
+  if(PYARROW_BUNDLE_ARROW_CPP)
+    bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
+    if(MSVC)
+      bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB})
+    endif()
+  endif()
+
+  set(DATASET_LINK_LIBS ArrowAcero::arrow_acero_shared)

Review Comment:
   ```suggestion
     set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared)
   ```
   
   And we need the following change:
   
   ```diff
   diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
   index 580857d50c..e9b22d7531 100644
   --- a/python/CMakeLists.txt
   +++ b/python/CMakeLists.txt
   @@ -779,9 +779,13 @@ if(PYARROW_BUILD_SUBSTRAIT)
      target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS})
    endif()
    
   +if(PYARROW_BUILD_ACERO)
   +  target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS})
   +  target_link_libraries(_exec_plan PRIVATE ${ACERO_LINK_LIBS})
   +endif()
   +
    if(PYARROW_BUILD_DATASET)
      target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
   -  target_link_libraries(_exec_plan PRIVATE ${DATASET_LINK_LIBS})
      if(PYARROW_BUILD_ORC)
        target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS})
      endif()
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org