You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by fe...@apache.org on 2023/12/26 17:14:39 UTC

(arrow) branch main updated: GH-39357: [C++] Reduce function.h includes (#39312)

This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new cf44793204 GH-39357: [C++] Reduce function.h includes (#39312)
cf44793204 is described below

commit cf44793204d88e0156669af102ff65f180a6b003
Author: Rossi(Ruoxi) Sun <za...@gmail.com>
AuthorDate: Tue Dec 26 09:14:32 2023 -0800

    GH-39357: [C++] Reduce function.h includes (#39312)
    
    
    
    ### Rationale for this change
    
    As proposed in #36246 , by splitting function option structs from `function.h`, we can reduce the including of `function.h`. So that the total build time could be reduced.
    
    The total parser time could be reduced from 722.3s to 709.7s. And the `function.h` along with its transitive inclusion of `kernel.h` don't show up in expensive headers any more.
    
    The detailed analysis result before and after this PR are attached:
    [analyze-before.txt](https://github.com/apache/arrow/files/13756923/analyze-before.txt)
    [analyze-after.txt](https://github.com/apache/arrow/files/13756924/analyze-after.txt)
    
    Disclaimer (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866974963):
    > Note that the time diff is not absolute. The ClangBuildAnalyzer result differs from time to time. I guess it depends on the idle-ness of the building machine when doing the experiment. But the time reduction is almost certain, though sometimes more sometimes less. And the inclusion times of the questioning headers are reduced for sure, as shown in the attachments in my other comment.
    
    ### What changes are included in this PR?
    
    Move function option structs into own `compute/options.h`, and change including `function.h` to including `options.h` wherever fits.
    
    ### Are these changes tested?
    
    Build is testing.
    
    ### Are there any user-facing changes?
    
    There could be potential build failures for user code (quote from https://github.com/apache/arrow/issues/36246#issuecomment-1866980969):
    > The header function.h remains in compute/api.h, with and without this PR. The proposed PR removes function.h from api_xxx.h (then includes options.h instead), as proposed in the initial description of this issue. This results in compile failures for user code which includes only compute/api_xxx.h but not compute/api.h, and meanwhile uses CallFunction which is declared in function.h.
    
    But I think it's OK as described in https://github.com/apache/arrow/issues/36246#issuecomment-1867018578.
    
    * Closes: #39357
    
    Authored-by: zanmato <za...@gmail.com>
    Signed-off-by: Felipe Oliveira Carvalho <fe...@gmail.com>
---
 .../arrow/compute_and_write_csv_example.cc         |  2 +-
 cpp/src/arrow/acero/aggregate_internal.cc          |  1 +
 cpp/src/arrow/acero/scalar_aggregate_node.cc       |  1 +
 cpp/src/arrow/compute/api.h                        | 21 +++---
 cpp/src/arrow/compute/api_aggregate.h              |  2 +-
 cpp/src/arrow/compute/api_scalar.h                 |  2 +-
 cpp/src/arrow/compute/api_vector.h                 |  3 +-
 cpp/src/arrow/compute/cast.h                       |  1 +
 cpp/src/arrow/compute/function.cc                  |  1 +
 cpp/src/arrow/compute/function.h                   | 46 +-----------
 cpp/src/arrow/compute/function_options.h           | 81 ++++++++++++++++++++++
 .../compute/kernels/scalar_if_else_benchmark.cc    |  1 +
 cpp/src/arrow/compute/kernels/vector_rank.cc       |  1 +
 .../compute/kernels/vector_replace_benchmark.cc    |  1 +
 .../compute/kernels/vector_run_end_encode_test.cc  |  1 +
 cpp/src/arrow/compute/kernels/vector_select_k.cc   |  1 +
 cpp/src/arrow/compute/kernels/vector_sort.cc       |  1 +
 cpp/src/arrow/compute/registry_test.cc             |  1 +
 cpp/src/arrow/compute/type_fwd.h                   |  1 +
 19 files changed, 111 insertions(+), 58 deletions(-)

diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc
index edf21e45b2..7e0f6cdf1c 100644
--- a/cpp/examples/arrow/compute_and_write_csv_example.cc
+++ b/cpp/examples/arrow/compute_and_write_csv_example.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #include <arrow/api.h>
-#include <arrow/compute/api_aggregate.h>
+#include <arrow/compute/api.h>
 #include <arrow/csv/api.h>
 #include <arrow/csv/writer.h>
 #include <arrow/io/api.h>
diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc
index 3cd5491720..9c4b7fe5ae 100644
--- a/cpp/src/arrow/acero/aggregate_internal.cc
+++ b/cpp/src/arrow/acero/aggregate_internal.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/acero/options.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc
index ae59aa6920..c7805f4d24 100644
--- a/cpp/src/arrow/acero/scalar_aggregate_node.cc
+++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/util.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h
index 5b5dfdf69e..b701d99286 100644
--- a/cpp/src/arrow/compute/api.h
+++ b/cpp/src/arrow/compute/api.h
@@ -20,18 +20,23 @@
 
 #pragma once
 
+/// \defgroup compute-functions Abstract compute function API
+/// @{
+/// @}
+
 /// \defgroup compute-concrete-options Concrete option classes for compute functions
 /// @{
 /// @}
 
-#include "arrow/compute/api_aggregate.h"  // IWYU pragma: export
-#include "arrow/compute/api_scalar.h"     // IWYU pragma: export
-#include "arrow/compute/api_vector.h"     // IWYU pragma: export
-#include "arrow/compute/cast.h"           // IWYU pragma: export
-#include "arrow/compute/function.h"       // IWYU pragma: export
-#include "arrow/compute/kernel.h"         // IWYU pragma: export
-#include "arrow/compute/registry.h"       // IWYU pragma: export
-#include "arrow/datum.h"                  // IWYU pragma: export
+#include "arrow/compute/api_aggregate.h"     // IWYU pragma: export
+#include "arrow/compute/api_scalar.h"        // IWYU pragma: export
+#include "arrow/compute/api_vector.h"        // IWYU pragma: export
+#include "arrow/compute/cast.h"              // IWYU pragma: export
+#include "arrow/compute/function.h"          // IWYU pragma: export
+#include "arrow/compute/function_options.h"  // IWYU pragma: export
+#include "arrow/compute/kernel.h"            // IWYU pragma: export
+#include "arrow/compute/registry.h"          // IWYU pragma: export
+#include "arrow/datum.h"                     // IWYU pragma: export
 
 #include "arrow/compute/expression.h"  // IWYU pragma: export
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 3493c31463..4d2c814a69 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -22,7 +22,7 @@
 
 #include <vector>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/util/macros.h"
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 9f12471ddc..26fbe64f74 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -24,7 +24,7 @@
 #include <string>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 0233090ef6..759f9e5c1a 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -20,9 +20,8 @@
 #include <memory>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/ordering.h"
-#include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 613e8a55ad..18e56092dd 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c0433145dd..e1a2e8c5d8 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -26,6 +26,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/function_internal.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 333c9a65c5..be934a3c5a 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -36,53 +36,9 @@
 namespace arrow {
 namespace compute {
 
-/// \defgroup compute-functions Abstract compute function API
-///
+/// \addtogroup compute-functions
 /// @{
 
-/// \brief Extension point for defining options outside libarrow (but
-/// still within this project).
-class ARROW_EXPORT FunctionOptionsType {
- public:
-  virtual ~FunctionOptionsType() = default;
-
-  virtual const char* type_name() const = 0;
-  virtual std::string Stringify(const FunctionOptions&) const = 0;
-  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
-  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
-  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const Buffer& buffer) const;
-  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
-};
-
-/// \brief Base class for specifying options configuring a function's behavior,
-/// such as error handling.
-class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
- public:
-  virtual ~FunctionOptions() = default;
-
-  const FunctionOptionsType* options_type() const { return options_type_; }
-  const char* type_name() const { return options_type()->type_name(); }
-
-  bool Equals(const FunctionOptions& other) const;
-  std::string ToString() const;
-  std::unique_ptr<FunctionOptions> Copy() const;
-  /// \brief Serialize an options struct to a buffer.
-  Result<std::shared_ptr<Buffer>> Serialize() const;
-  /// \brief Deserialize an options struct from a buffer.
-  /// Note: this will only look for `type_name` in the default FunctionRegistry;
-  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
-  /// call FunctionOptionsType::Deserialize().
-  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const std::string& type_name, const Buffer& buffer);
-
- protected:
-  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
-  const FunctionOptionsType* options_type_;
-};
-
-ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
-
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
diff --git a/cpp/src/arrow/compute/function_options.h b/cpp/src/arrow/compute/function_options.h
new file mode 100644
index 0000000000..88ec2fd2d0
--- /dev/null
+++ b/cpp/src/arrow/compute/function_options.h
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle.
+
+#pragma once
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+/// \addtogroup compute-functions
+/// @{
+
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
+};
+
+/// \brief Base class for specifying options configuring a function's behavior,
+/// such as error handling.
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
+  virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  std::string ToString() const;
+  std::unique_ptr<FunctionOptions> Copy() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
+};
+
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index b72402bbcc..58bc560f52 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/array/util.h"
 #include "arrow/compute/api_scalar.h"
+#include "arrow/compute/function.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/key_value_metadata.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 780ae25d96..0cea7246e5 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
index 719969d46e..971a841de0 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
@@ -18,6 +18,7 @@
 #include <benchmark/benchmark.h>
 
 #include "arrow/array.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
index 0bd8e3386e..f02aee1b35 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/validate.h"
 #include "arrow/builder.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index 5000de8996..1740a9b7f0 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -17,6 +17,7 @@
 
 #include <queue>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8ddcbb9905..e08a2bc103 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -17,6 +17,7 @@
 
 #include <unordered_set>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc
index 7fee136de7..2d69f119df 100644
--- a/cpp/src/arrow/compute/registry_test.cc
+++ b/cpp/src/arrow/compute/registry_test.cc
@@ -22,6 +22,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/registry.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 3f990b1814..89f32ceb0f 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -27,6 +27,7 @@ struct TypeHolder;
 namespace compute {
 
 class Function;
+class ScalarAggregateFunction;
 class FunctionExecutor;
 class FunctionOptions;
 class FunctionRegistry;