You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/01/12 13:40:07 UTC

[GitHub] [arrow] lidavidm commented on a change in pull request #12100: ARROW-15061: [C++] Add logging for kernel functions and exec plan nodes

lidavidm commented on a change in pull request #12100:
URL: https://github.com/apache/arrow/pull/12100#discussion_r783078213



##########
File path: cpp/src/arrow/util/tracing.h
##########
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+namespace internal {
+namespace tracing {
+
+// Forward declaration SpanImpl.
+class SpanImpl;
+
+}  // namespace tracing
+}  // namespace internal
+
+namespace util {
+namespace tracing {
+
+class ARROW_EXPORT Span {
+ public:
+  using Impl = arrow::internal::tracing::SpanImpl;
+
+  Span() = default;  // Default constructor. The inner_impl is a nullptr.
+  ~Span();  // Destructor. Default destructor defined in tracing.cc where impl is a
+            // complete type.
+
+  Impl& Set(const Impl&);
+  Impl& Set(Impl&&);
+
+  const Impl& Get() const {
+    if (inner_impl) {
+      return *inner_impl;
+    } else {
+      ARROW_LOG(FATAL) << "Attempted to dereference a null pointer. Use Span::Set before "

Review comment:
       The DCHECK/CHECK macros can handle this more concisely:
   
   ```
   ARROW_DCHECK(inner_impl) << "...";
   return *inner_impl;
   ```
   
   or
   
   ```
   ARROW_CHECK(inner_impl) << "..."; // will be retained in release builds
   return *inner_impl;
   ```

##########
File path: cpp/src/arrow/compute/exec/exec_plan.h
##########
@@ -245,6 +246,8 @@ class ARROW_EXPORT ExecNode {
   std::shared_ptr<Schema> output_schema_;
   int num_outputs_;
   NodeVector outputs_;
+
+  util::tracing::Span span;

Review comment:
       This should be `span_` for consistency (sorry for the churn)

##########
File path: cpp/src/arrow/compute/exec/exec_plan.cc
##########
@@ -172,6 +182,8 @@ struct ExecPlanImpl : public ExecPlan {
   NodeVector sources_, sinks_;
   NodeVector sorted_nodes_;
   uint32_t auto_label_counter_ = 0;
+
+  util::tracing::Span span;

Review comment:
       Same here - this should be `span_`

##########
File path: cpp/src/arrow/compute/exec/hash_join.h
##########
@@ -112,6 +113,9 @@ class HashJoinImpl {
   virtual void Abort(TaskScheduler::AbortContinuationImpl pos_abort_callback) = 0;
 
   static Result<std::unique_ptr<HashJoinImpl>> MakeBasic();
+
+ protected:
+  util::tracing::Span span;

Review comment:
       Same here, we generally suffix private/protected members with `_`. (See ["Class Data Members"](https://google.github.io/styleguide/cppguide.html#Variable_Names).)

##########
File path: cpp/src/arrow/compute/exec/aggregate_node.cc
##########
@@ -188,36 +194,40 @@ class ScalarAggregateNode : public ExecNode {
   }
 
   void ErrorReceived(ExecNode* input, Status error) override {
+    EVENT(span, "ErrorReceived", {{"error", error.message()}});
     DCHECK_EQ(input, inputs_[0]);
     outputs_[0]->ErrorReceived(this, std::move(error));
   }
 
   void InputFinished(ExecNode* input, int total_batches) override {
+    EVENT(span, "InputFinished", {{"batches.length", total_batches}});
     DCHECK_EQ(input, inputs_[0]);
-
     if (input_counter_.SetTotal(total_batches)) {
       ErrorIfNotOk(Finish());
     }
   }
 
   Status StartProducing() override {
+    START_SPAN(span, kind_name(), {{"node", ToString()}});
     finished_ = Future<>::Make();
     // Scalar aggregates will only output a single batch
     outputs_[0]->InputFinished(this, 1);
     return Status::OK();
   }
 
-  void PauseProducing(ExecNode* output) override {}
+  void PauseProducing(ExecNode* output) override { EVENT(span, "PauseProducing"); }
 
-  void ResumeProducing(ExecNode* output) override {}
+  void ResumeProducing(ExecNode* output) override { EVENT(span, "ResumeProducing"); }
 
   void StopProducing(ExecNode* output) override {
     DCHECK_EQ(output, outputs_[0]);
     StopProducing();
   }
 
   void StopProducing() override {
+    EVENT(span, "StopProducing");
     if (input_counter_.Cancel()) {
+      END_SPAN(span);

Review comment:
       We could add a callback to finished_ in StartProducing above instead.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org