You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by "kou (via GitHub)" <gi...@apache.org> on 2023/03/23 21:15:51 UTC
[GitHub] [arrow] kou commented on a diff in pull request #34711: GH-15280: [C++][Python][GLib] add libarrow_acero containing everything previously in compute/exec
kou commented on code in PR #34711:
URL: https://github.com/apache/arrow/pull/34711#discussion_r1146823936
##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -311,6 +311,7 @@ takes precedence over ccache if a storage backend is configured" ON)
OFF
DEPENDS
ARROW_COMPUTE
Review Comment:
Could you remove `ARROW_COMPUTE` because `ARROW_ACERO` depends on it?
```suggestion
```
##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -380,6 +381,7 @@ takes precedence over ccache if a storage backend is configured" ON)
OFF
DEPENDS
ARROW_COMPUTE
Review Comment:
```suggestion
```
##########
cpp/examples/arrow/CMakeLists.txt:
##########
@@ -20,7 +20,13 @@ add_arrow_example(row_wise_conversion_example)
add_arrow_example(rapidjson_row_converter)
if(ARROW_COMPUTE)
Review Comment:
Should this be `ARROW_ACERO`?
```suggestion
if(ARROW_ACERO)
```
##########
cpp/examples/arrow/execution_plan_documentation_examples.cc:
##########
@@ -23,7 +23,7 @@
#include <arrow/compute/api.h>
#include <arrow/compute/api_vector.h>
#include <arrow/compute/cast.h>
-#include <arrow/compute/exec/exec_plan.h>
+#include "arrow/acero/exec_plan.h"
Review Comment:
Why?
##########
cpp/src/arrow/acero/ArrowAceroConfig.cmake.in:
##########
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This config sets the following variables in your project::
+#
+# ArrowSubstrait_FOUND - true if Arrow Substrait found on the system
Review Comment:
```suggestion
# ArrowAcero_FOUND - true if Arrow Acero found on the system
```
##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
Review Comment:
```suggestion
```
##########
cpp/cmake_modules/DefineOptions.cmake:
##########
@@ -380,6 +381,7 @@ takes precedence over ccache if a storage backend is configured" ON)
OFF
DEPENDS
ARROW_COMPUTE
+ ARROW_ACERO
Review Comment:
Do we need this?
It seems that `ARROW_DATASET` depends on `ARROW_ACERO`.
##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+ groupby.cc
+ accumulation_queue.cc
+ aggregate_node.cc
+ asof_join_node.cc
+ bloom_filter.cc
+ exec_plan.cc
+ fetch_node.cc
+ filter_node.cc
+ hash_join.cc
+ hash_join_dict.cc
+ hash_join_node.cc
+ map_node.cc
+ options.cc
+ order_by_node.cc
+ order_by_impl.cc
+ partition_util.cc
+ pivot_longer_node.cc
+ project_node.cc
+ query_context.cc
+ sink_node.cc
+ source_node.cc
+ swiss_join.cc
+ task_util.cc
+ tpch_node.cc
+ union_node.cc
+ util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+ list(APPEND
+ ARROW_ACERO_SHARED_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+ list(APPEND
+ ARROW_ACERO_STATIC_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+ CMAKE_PACKAGE_NAME
+ ArrowAcero
+ PKG_CONFIG_NAME
+ arrow-acero
+ OUTPUTS
+ ARROW_ACERO_LIBRARIES
+ SOURCES
+ ${ARROW_ACERO_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+ SHARED_LINK_LIBS
+ ${ARROW_ACERO_SHARED_LINK_LIBS}
+ SHARED_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_shared
+ STATIC_LINK_LIBS
+ ${ARROW_ACERO_STATIC_LINK_LIBS}
+ STATIC_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+ target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)
Review Comment:
```suggestion
function(add_arrow_acero_test REL_TEST_NAME)
```
##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+ groupby.cc
+ accumulation_queue.cc
+ aggregate_node.cc
+ asof_join_node.cc
+ bloom_filter.cc
+ exec_plan.cc
+ fetch_node.cc
+ filter_node.cc
+ hash_join.cc
+ hash_join_dict.cc
+ hash_join_node.cc
+ map_node.cc
+ options.cc
+ order_by_node.cc
+ order_by_impl.cc
+ partition_util.cc
+ pivot_longer_node.cc
+ project_node.cc
+ query_context.cc
+ sink_node.cc
+ source_node.cc
+ swiss_join.cc
+ task_util.cc
+ tpch_node.cc
+ union_node.cc
+ util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+ list(APPEND
+ ARROW_ACERO_SHARED_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+ list(APPEND
+ ARROW_ACERO_STATIC_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+ CMAKE_PACKAGE_NAME
+ ArrowAcero
+ PKG_CONFIG_NAME
+ arrow-acero
+ OUTPUTS
+ ARROW_ACERO_LIBRARIES
+ SOURCES
+ ${ARROW_ACERO_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+ SHARED_LINK_LIBS
+ ${ARROW_ACERO_SHARED_LINK_LIBS}
+ SHARED_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_shared
+ STATIC_LINK_LIBS
+ ${ARROW_ACERO_STATIC_LINK_LIBS}
+ STATIC_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+ target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)
+ set(options REQUIRE_ALL_KERNELS)
+ set(one_value_args PREFIX)
+ set(multi_value_args LABELS)
+ cmake_parse_arguments(ARG
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
+
+ if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
+ return()
+ endif()
+
+ if(ARG_PREFIX)
+ set(PREFIX ${ARG_PREFIX})
+ else()
+ set(PREFIX "arrow-acero")
+ endif()
+
+ if(ARG_LABELS)
+ set(LABELS ${ARG_LABELS})
+ else()
+ set(LABELS "arrow_acero")
+ endif()
+
+ add_arrow_test(${REL_TEST_NAME}
+ EXTRA_LINK_LIBS
+ ${ARROW_ACERO_TEST_LINK_LIBS}
+ PREFIX
+ ${PREFIX}
+ SOURCES
+ test_util_internal.cc
+ LABELS
+ ${LABELS}
+ ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_acero_test(subtree_test
+ REQUIRE_ALL_KERNELS
+ PREFIX
+ "arrow-acero"
Review Comment:
Why do we need to specify `PREFIX "arrow-acero"` explicitly in all (?) `add_arrow_acero_test`?
The default `PREFIX` is defined as `"arrow-acero"` in `add_arrow_acero_test`.
##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
SHARED_LINK_FLAGS
${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
SHARED_LINK_LIBS
+ arrow_acero_shared
arrow_dataset_shared
substrait
SHARED_INSTALL_INTERFACE_LIBS
+ ArrowAcero::arrow_acero_shared
ArrowDataset::arrow_dataset_shared
STATIC_LINK_LIBS
+ arrow_acero_static
Review Comment:
Ditto.
##########
cpp/src/arrow/acero/visibility.h:
##########
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
Review Comment:
Do we need this?
##########
cpp/src/arrow/compute/kernels/CMakeLists.txt:
##########
@@ -93,9 +90,12 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
# Aggregates
-add_arrow_compute_test(aggregate_test
- SOURCES
- aggregate_test.cc
- hash_aggregate_test.cc
+add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc
+ # hash_aggregate_test.cc
Review Comment:
Is it WIP?
##########
cpp/src/arrow/engine/arrow-substrait.pc.in:
##########
@@ -23,5 +23,5 @@ Name: Apache Arrow Substrait Consumer
Description: Apache Arrow's Substrait Consumer.
Version: @ARROW_VERSION@
Requires: arrow
Review Comment:
```suggestion
Requires: arrow-acero
```
##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
SHARED_LINK_FLAGS
${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
SHARED_LINK_LIBS
+ arrow_acero_shared
arrow_dataset_shared
substrait
SHARED_INSTALL_INTERFACE_LIBS
+ ArrowAcero::arrow_acero_shared
ArrowDataset::arrow_dataset_shared
STATIC_LINK_LIBS
+ arrow_acero_static
arrow_dataset_static
substrait
STATIC_INSTALL_INTERFACE_LIBS
+ ArrowAcero::arrow_acero_static
Review Comment:
Ditto.
##########
cpp/src/arrow/engine/arrow-substrait.pc.in:
##########
@@ -23,5 +23,5 @@ Name: Apache Arrow Substrait Consumer
Description: Apache Arrow's Substrait Consumer.
Version: @ARROW_VERSION@
Requires: arrow
-Libs: -L${libdir} -larrow_substrait
+Libs: -L${libdir} -larrow_acero -larrow_substrait
Review Comment:
```suggestion
Libs: -L${libdir} -larrow_substrait
```
##########
cpp/examples/arrow/CMakeLists.txt:
##########
@@ -20,7 +20,13 @@ add_arrow_example(row_wise_conversion_example)
add_arrow_example(rapidjson_row_converter)
if(ARROW_COMPUTE)
- add_arrow_example(compute_register_example)
+ if(ARROW_BUILD_SHARED)
+ set(ENGINE_ACERO_EXAMPLE_LINK_LIBS arrow_acero_shared)
+ else()
+ set(ENGINE_ACERO_EXAMPLE_LINK_LIBS arrow_acero_static)
+ endif()
+ add_arrow_example(compute_register_example EXTRA_LINK_LIBS
Review Comment:
Should we rename to `acero_register_example`?
```suggestion
add_arrow_example(acero_register_example EXTRA_LINK_LIBS
```
##########
cpp/examples/arrow/compute_register_example.cc:
##########
@@ -17,10 +17,10 @@
#include <arrow/api.h>
#include <arrow/compute/api.h>
-#include <arrow/compute/exec/exec_plan.h>
-#include <arrow/compute/exec/options.h>
#include <arrow/util/async_generator.h>
#include <arrow/util/future.h>
+#include "arrow/acero/exec_plan.h"
+#include "arrow/acero/options.h"
#include "arrow/compute/expression.h"
Review Comment:
Could you use `#include <...>` instead of `#include "..."` because they don't exist at relative path from this file?
##########
cpp/src/arrow/acero/CMakeLists.txt:
##########
@@ -0,0 +1,273 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+add_custom_target(arrow_acero)
+
+arrow_install_all_headers("arrow/acero")
+
+# pkg-config support
+#arrow_add_pkg_config("arrow-acero")
+
+set(ARROW_ACERO_SRCS
+ groupby.cc
+ accumulation_queue.cc
+ aggregate_node.cc
+ asof_join_node.cc
+ bloom_filter.cc
+ exec_plan.cc
+ fetch_node.cc
+ filter_node.cc
+ hash_join.cc
+ hash_join_dict.cc
+ hash_join_node.cc
+ map_node.cc
+ options.cc
+ order_by_node.cc
+ order_by_impl.cc
+ partition_util.cc
+ pivot_longer_node.cc
+ project_node.cc
+ query_context.cc
+ sink_node.cc
+ source_node.cc
+ swiss_join.cc
+ task_util.cc
+ tpch_node.cc
+ union_node.cc
+ util.cc)
+
+append_avx2_src(bloom_filter_avx2.cc)
+append_avx2_src(swiss_join_avx2.cc)
+append_avx2_src(util_avx2.cc)
+
+set(ARROW_ACERO_SHARED_LINK_LIBS)
+set(ARROW_ACERO_STATIC_LINK_LIBS)
+
+if(ARROW_WITH_OPENTELEMETRY)
+ list(APPEND
+ ARROW_ACERO_SHARED_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+ list(APPEND
+ ARROW_ACERO_STATIC_LINK_LIBS
+ opentelemetry-cpp::trace
+ opentelemetry-cpp::ostream_span_exporter
+ opentelemetry-cpp::otlp_http_exporter)
+endif()
+
+add_arrow_lib(arrow_acero
+ CMAKE_PACKAGE_NAME
+ ArrowAcero
+ PKG_CONFIG_NAME
+ arrow-acero
+ OUTPUTS
+ ARROW_ACERO_LIBRARIES
+ SOURCES
+ ${ARROW_ACERO_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:arrow/acero/pch.h>"
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
+ SHARED_LINK_LIBS
+ ${ARROW_ACERO_SHARED_LINK_LIBS}
+ SHARED_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_shared
+ STATIC_LINK_LIBS
+ ${ARROW_ACERO_STATIC_LINK_LIBS}
+ STATIC_INSTALL_INTERFACE_LIBS
+ Arrow::arrow_static)
+
+if(ARROW_BUILD_STATIC AND WIN32)
+ target_compile_definitions(arrow_acero_static PUBLIC ARROW_ACERO_STATIC)
+endif()
+
+set(ARROW_ACERO_TEST_LINK_LIBS ${ARROW_LINK_lIBS} ${ARROW_TEST_LINK_LIBS})
+if(ARROW_TEST_LINKAGE STREQUAL "static")
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_static)
+else()
+ list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_shared)
+endif()
+
+function(ADD_ARROW_ACERO_TEST REL_TEST_NAME)
+ set(options REQUIRE_ALL_KERNELS)
+ set(one_value_args PREFIX)
+ set(multi_value_args LABELS)
+ cmake_parse_arguments(ARG
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
+
+ if(ARG_REQUIRE_ALL_KERNELS AND (NOT ARROW_COMPUTE))
+ return()
+ endif()
Review Comment:
Do we need this?
It seems that `ARROW_ACERO` depends on `ARROW_COMPUTE`.
##########
cpp/src/arrow/compute/kernels/CMakeLists.txt:
##########
@@ -93,9 +90,12 @@ add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
# Aggregates
-add_arrow_compute_test(aggregate_test
- SOURCES
- aggregate_test.cc
- hash_aggregate_test.cc
+add_arrow_compute_test(aggregate_test SOURCES aggregate_test.cc
+ # hash_aggregate_test.cc
test_util.cc)
-add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")
+# add_arrow_benchmark(aggregate_benchmark PREFIX "arrow-compute")
Review Comment:
Is it WIP?
##########
cpp/src/arrow/acero/visibility.h:
##########
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This API is EXPERIMENTAL.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_ACERO_STATIC
+#define ARROW_ACERO_EXPORT
+#elif defined(ARROW_ACERO_EXPORTING)
+#define ARROW_ACERO_EXPORT __declspec(dllexport)
+#else
+#define ARROW_ACERO_EXPORT __declspec(dllimport)
+#endif
+
+#define ARROW_ACERO_NO_EXPORT
+#else // Not Windows
+#ifndef ARROW_ACERO_EXPORT
+#define ARROW_ACERO_EXPORT __attribute__((visibility("default")))
+#endif
+#ifndef ARROW_ACERO_NO_EXPORT
+#define ARROW_ACERO_NO_EXPORT __attribute__((visibility("hidden")))
+#endif
+#endif // Non-Windows
Review Comment:
"Not Windows" like "#else // Not Windows" above?
##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
SHARED_LINK_FLAGS
${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
SHARED_LINK_LIBS
+ arrow_acero_shared
arrow_dataset_shared
substrait
SHARED_INSTALL_INTERFACE_LIBS
+ ArrowAcero::arrow_acero_shared
Review Comment:
I think that we don't need this because `ArrowDataset::arrow_dataset_shared` has this dependency.
##########
cpp/src/arrow/compute/kernels/aggregate_benchmark.cc:
##########
@@ -21,7 +21,7 @@
#include "arrow/array/array_primitive.h"
#include "arrow/compute/api.h"
-#include "arrow/compute/exec/groupby.h"
+//#include "arrow/acero/groupby.h"
Review Comment:
If this is needless, could you remove this?
##########
cpp/src/arrow/engine/CMakeLists.txt:
##########
@@ -46,14 +46,18 @@ add_arrow_lib(arrow_substrait
SHARED_LINK_FLAGS
${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt
SHARED_LINK_LIBS
+ arrow_acero_shared
Review Comment:
I think that we don't need this because `arrow_dataset_shared` has this dependency.
##########
cpp/src/arrow/engine/substrait/test_util.h:
##########
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/vector.h"
+
+#include <functional>
+#include <random>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "arrow/acero/exec_plan.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/testing/visibility.h"
+#include "arrow/util/async_generator.h"
+#include "arrow/util/pcg_random.h"
+
+namespace arrow {
+namespace compute {
+//
Review Comment:
Do these comments mean WIP?
##########
python/CMakeLists.txt:
##########
@@ -113,6 +113,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF)
option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF)
option(PYARROW_BUILD_SUBSTRAIT "Build the PyArrow Substrait integration" OFF)
+ option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF)
Review Comment:
Could you sort these `PYARROW_BUILD_*` lines?
##########
python/CMakeLists.txt:
##########
@@ -561,6 +572,19 @@ if(PYARROW_BUILD_CUDA)
set_source_files_properties(pyarrow/_cuda.pyx PROPERTIES CYTHON_API TRUE)
endif()
+# Acero
+if(PYARROW_BUILD_ACERO)
+ if(PYARROW_BUNDLE_ARROW_CPP)
+ bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION})
+ if(MSVC)
+ bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB})
+ endif()
+ endif()
+
+ set(DATASET_LINK_LIBS ArrowAcero::arrow_acero_shared)
Review Comment:
```suggestion
set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared)
```
And we need the following change:
```diff
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 580857d50c..e9b22d7531 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -779,9 +779,13 @@ if(PYARROW_BUILD_SUBSTRAIT)
target_link_libraries(_substrait PRIVATE ${SUBSTRAIT_LINK_LIBS})
endif()
+if(PYARROW_BUILD_ACERO)
+ target_link_libraries(_acero PRIVATE ${ACERO_LINK_LIBS})
+ target_link_libraries(_exec_plan PRIVATE ${ACERO_LINK_LIBS})
+endif()
+
if(PYARROW_BUILD_DATASET)
target_link_libraries(_dataset PRIVATE ${DATASET_LINK_LIBS})
- target_link_libraries(_exec_plan PRIVATE ${DATASET_LINK_LIBS})
if(PYARROW_BUILD_ORC)
target_link_libraries(_dataset_orc PRIVATE ${DATASET_LINK_LIBS})
endif()
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org
For queries about this service, please contact Infrastructure at:
users@infra.apache.org