You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/03/19 01:01:00 UTC

[GitHub] [tvm] icemelon9 commented on a change in pull request #7624: [Profiling,VM] Profiling interface for VM and Graph Runtime

icemelon9 commented on a change in pull request #7624:
URL: https://github.com/apache/tvm/pull/7624#discussion_r597328855



##########
File path: include/tvm/runtime/profiling.h
##########
@@ -144,6 +149,134 @@ class Timer : public ObjectRef {
  */
 Timer DefaultTimer(TVMContext ctx);
 
+namespace profiling {
+
+/*! Information about a single function or operator call. */
+struct CallFrame {
+  /*! Context in which the call was made */
+  TVMContext ctx;
+  /*! Name of the function or ap */

Review comment:
       ```suggestion
     /*! Name of the function or op */
   ```

##########
File path: src/runtime/profiling.cc
##########
@@ -93,5 +97,270 @@ Timer Timer::Start(TVMContext ctx) {
 }
 
 TVM_REGISTER_GLOBAL("profiling.start_timer").set_body_typed(Timer::Start);
+
+namespace profiling {
+
+void Profiler::Start(const std::vector<TVMContext>& ctxs) {
+  CHECK(global_timers_.empty()) << "You can only call Start once per Profiler.";
+  for (auto ctx : ctxs) {
+    global_timers_.emplace_back(ctx, Timer::Start(ctx));
+  }
+}
+
+void Profiler::StartCall(String name, TVMContext ctx,
+                         std::unordered_map<std::string, ObjectRef> extra_metrics) {
+  in_flight_.push(CallFrame{ctx, name, Timer::Start(ctx), extra_metrics});
+}
+
+void Profiler::StopCall(std::unordered_map<std::string, ObjectRef> extra_metrics) {
+  CallFrame cf = in_flight_.top();
+  cf.timer->Stop();
+  for (auto& p : extra_metrics) {
+    cf.extra_metrics[p.first] = p.second;
+  }
+  in_flight_.pop();
+  calls_.push_back(cf);
+}
+
+void Profiler::Stop() {
+  // Stop all global timers. We wait to synchronize until we are making the report.
+  for (auto p : global_timers_) {
+    p.second->Stop();
+  }
+}
+
+String ShapeString(const std::vector<NDArray>& shapes) {
+  std::stringstream sizes;
+  for (const NDArray& ary : shapes) {
+    if (sizes.tellp() > 0) {
+      sizes << ", ";
+    }
+    auto shape = ary.Shape();
+    sizes << ary.DataType() << "[";
+    for (size_t i = 0; i < shape.size(); i++) {
+      if (i != 0) {
+        sizes << ", ";
+      }
+      sizes << shape[i];
+    }
+    sizes << "]";
+  }
+  return String(sizes.str());
+}
+
+std::string FormatTable(const std::vector<std::unordered_map<std::string, ObjectRef>>& rows,
+                        std::unordered_set<std::string> hidden_cols = {"Argument Shapes",
+                                                                       "Context"}) {
+  std::unordered_set<std::string> unique_headers;
+
+  for (auto row : rows) {
+    for (auto p : row) {
+      unique_headers.insert(p.first);
+    }
+  }
+
+  std::vector<std::string> headers = {"Name", "Duration (us)", "Percent"};
+  for (auto header : unique_headers) {
+    if (header != "Name" && header != "Duration (us)" && header != "Percent" &&
+        hidden_cols.find(header) == hidden_cols.end()) {
+      headers.push_back(header);
+    }
+  }
+
+  std::vector<std::vector<std::string>> cols;
+  for (auto header : headers) {
+    cols.push_back({header});
+  }
+  for (auto row : rows) {
+    for (size_t i = 0; i < headers.size(); i++) {
+      auto it = row.find(headers[i]);
+      if (it == row.end()) {
+        cols[i].push_back("");
+      } else {
+        std::string val;
+        if (it->second.as<CountNode>()) {
+          std::stringstream s;
+          s.imbue(std::locale(""));  // for 1000s seperators
+          s << std::fixed << it->second.as<CountNode>()->value;
+          val = s.str();
+        } else if (it->second.as<DurationNode>()) {
+          std::stringstream s;
+          s.imbue(std::locale(""));  // for 1000s seperators
+          s << std::fixed << std::setprecision(2) << it->second.as<DurationNode>()->value;
+          val = s.str();
+        } else if (it->second.as<PercentNode>()) {
+          std::stringstream s;
+          s << std::fixed << std::setprecision(2) << it->second.as<PercentNode>()->value;
+          val = s.str();
+        } else if (it->second.as<StringObj>()) {
+          val = Downcast<String>(it->second);
+        }
+        cols[i].push_back(val);
+      }
+    }
+  }
+
+  std::vector<size_t> widths;
+  for (auto v : cols) {
+    size_t width = 0;
+    for (auto x : v) {
+      width = std::max(width, x.size());
+    }
+    widths.push_back(width);
+  }
+  size_t length = 0;
+  for (auto v : cols) {
+    length = std::max(length, v.size());
+  }
+
+  std::stringstream s;
+  for (size_t row = 0; row < length; row++) {
+    for (size_t col = 0; col < cols.size(); col++) {
+      // left align first column
+      if (col == 0) {
+        s << std::left;
+      } else {
+        s << std::right;
+      }
+      if (row < cols[col].size()) {
+        s << std::setw(widths[col]) << cols[col][row] << "  ";
+      } else {
+        s << std::setw(widths[col]) << ""
+          << "  ";
+      }
+    }
+    s << std::endl;
+  }
+  return s.str();
+}
+
+String Profiler::Report(bool aggregate) {
+  std::vector<std::pair<TVMContext, double>> global_times;
+  for (auto p : global_timers_) {
+    global_times.emplace_back(p.first, p.second->SyncAndGetElapsedNanos() / 1e3);
+  }
+  double overall_time = 0.;
+  for (auto p : global_times) {
+    overall_time = std::max(overall_time, p.second);
+  }
+
+  // aggregate times by op name
+  std::vector<std::pair<std::string, std::vector<size_t>>> aggregate_rows;
+  if (aggregate) {
+    std::unordered_map<std::string, std::vector<size_t>> aggregates;
+    for (size_t i = 0; i < calls_.size(); i++) {
+      CallFrame& cf = calls_[i];
+      std::string name = cf.name;
+      // don't aggregate dynamic ops with different shapes
+      auto it = cf.extra_metrics.find("Argument Shapes");
+      if (it != cf.extra_metrics.end()) {
+        name = name + Downcast<String>(it->second);
+      }
+
+      if (aggregates.find(name) == aggregates.end()) {
+        aggregates[name] = {i};
+      } else {
+        aggregates[name].push_back(i);
+      }
+    }
+    for (const auto& p : aggregates) {
+      aggregate_rows.push_back(p);
+    }
+  } else {
+    for (size_t i = 0; i < calls_.size(); i++) {
+      aggregate_rows.push_back({calls_[i].name, {i}});
+    }
+  }
+
+  // aggregated rows (poor man's dataframe)
+  std::vector<std::unordered_map<std::string, ObjectRef>> rows;
+
+  // form aggregates and compute aggregate statistics (sum).
+  for (auto p : aggregate_rows) {
+    std::unordered_map<std::string, ObjectRef> row;
+    double time_sum = 0;
+    size_t count = 0;
+    for (auto i : p.second) {
+      double us = calls_[i].timer->SyncAndGetElapsedNanos() / 1e3;
+      time_sum += us;
+      count += 1;
+    }
+    row["Percent"] = ObjectRef(make_object<PercentNode>(time_sum / overall_time * 100));
+    row["Duration (us)"] = ObjectRef(make_object<DurationNode>(time_sum));
+    row["Count"] = ObjectRef(make_object<CountNode>(count));
+    row["Name"] = calls_[p.second[0]].name;
+    TVMContext ctx = calls_[p.second[0]].ctx;
+    row["Context"] = String(DeviceName(ctx.device_type) + std::to_string(ctx.device_id));
+
+    // assume all rows in the aggregate have the same metrics
+    for (auto metric : calls_[p.second[0]].extra_metrics) {
+      if (metric.second.as<CountNode>()) {
+        int64_t sum = 0;
+        for (auto i : p.second) {
+          sum += calls_[i].extra_metrics[metric.first].as<CountNode>()->value;
+        }
+        row[metric.first] = ObjectRef(make_object<CountNode>(sum));
+      } else if (metric.second.as<DurationNode>()) {
+        double sum = 0;
+        for (auto i : p.second) {
+          sum += calls_[i].extra_metrics[metric.first].as<DurationNode>()->value;
+        }
+        row[metric.first] = ObjectRef(make_object<DurationNode>(sum));
+      } else if (metric.second.as<PercentNode>()) {
+        double sum = 0;
+        for (auto i : p.second) {
+          sum += calls_[i].extra_metrics[metric.first].as<PercentNode>()->value;
+        }
+        row[metric.first] = ObjectRef(make_object<PercentNode>(sum));
+      } else if (metric.second.as<StringObj>()) {
+        // assume all rows contain the same value for this metric
+        row[metric.first] = Downcast<String>(metric.second);
+      }
+    }
+
+    rows.push_back(row);
+  }
+
+  // sort rows by duration
+  std::sort(rows.begin(), rows.end(),

Review comment:
       Could you have sort as an option as well?

##########
File path: src/runtime/vm/profiler/vm.cc
##########
@@ -42,59 +42,44 @@ namespace vm {
 PackedFunc VirtualMachineDebug::GetFunction(const std::string& name,
                                             const ObjectPtr<Object>& sptr_to_self) {
   if (name == "get_stat") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = prof_.Report(); });
+  } else if (name == "reset") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { prof_ = profiling::Profiler(); });
+  } else if (name == "invoke") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-      ICHECK_EQ(args.size(), 1U);
-      std::vector<std::pair<Index, double>> op_acc_time;
-      std::unordered_map<Index, std::vector<double>> op_durations;
-      for (auto kv : op_timers_) {
-        std::vector<double> durations_us;
-        for (auto t : kv.second) {
-          durations_us.push_back(t->SyncAndGetElapsedNanos() / 1e3);
+      std::vector<TVMContext> ctxs;
+      for (auto ctx : ctxs_) {
+        if (ctx.device_type > 0) {
+          ctxs.push_back(ctx);
         }
-        op_durations[kv.first] = durations_us;
-      }
-      for (auto kv : op_durations) {
-        auto val =
-            std::make_pair(kv.first, std::accumulate(kv.second.begin(), kv.second.end(), 0.0));
-        op_acc_time.push_back(val);
       }
-      bool sort_by_time = args[0];
-      if (sort_by_time) {
-        auto comp = [](const std::pair<Index, double>& lhs, const std::pair<Index, double>& rhs) {
-          return lhs.second > rhs.second;
-        };
-        std::sort(op_acc_time.begin(), op_acc_time.end(), comp);
+      auto invoke = VirtualMachine::GetFunction("invoke", sptr_to_self);
+      prof_.Start(ctxs);
+      invoke.CallPacked(args, rv);
+      prof_.Stop();
+    });
+  } else if (name == "profile") {
+    return TypedPackedFunc<String(String)>([sptr_to_self, this](String arg_name) {
+      std::vector<TVMContext> ctxs;
+      for (auto ctx : ctxs_) {
+        if (ctx.device_type > 0) {
+          ctxs.push_back(ctx);
+        }
       }
-      double total_duration = 0.0;
-      int64_t total_packed_funcs = 0;
-      std::ostringstream os;
-      os << std::setw(30) << std::left << "#OpName"
-         << "\t" << std::setw(10) << std::left << "#InvokeCount"
-         << "\t"
-         << "#Duration(us): Sum/Mean/Min/Max" << std::endl;
 
-      for (auto kv : op_acc_time) {
-        auto vals = op_durations[kv.first];
-        auto sum = kv.second;
-        auto mean = sum / static_cast<double>(vals.size());
-        auto min_value = *std::min_element(vals.begin(), vals.end());
-        auto max_value = *std::max_element(vals.begin(), vals.end());
-
-        os << std::setw(30) << std::left << packed_index_map_[kv.first] << "\t" << std::setw(10)
-           << std::left << op_invokes_[kv.first] << "\t" << sum << "/" << mean << "/" << min_value
-           << "/" << max_value << std::endl;
-
-        total_duration += sum;
-        total_packed_funcs += op_invokes_[kv.first];
+      auto invoke = VirtualMachine::GetFunction("invoke", sptr_to_self);
+      // warmup
+      for (int i = 0; i < 3; i++) {
+        invoke(arg_name);
       }
-      os << "\nTotal Duration: " << total_duration << " us.\t"
-         << "Total Packed Functions: " << total_packed_funcs << std::endl;
-      *rv = os.str();
-    });
-  } else if (name == "reset") {
-    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-      op_timers_.clear();
-      op_invokes_.clear();
+
+      prof_ = profiling::Profiler();  // reset profiler

Review comment:
       Since we create the profiler each time, we should just remove it from the class field.

##########
File path: src/runtime/vm/profiler/vm.h
##########
@@ -52,8 +52,7 @@ class VirtualMachineDebug : public VirtualMachine {
                     const std::vector<ObjectRef>& args) final;
 
   std::unordered_map<Index, std::string> packed_index_map_;
-  std::unordered_map<Index, std::vector<Timer>> op_timers_;
-  std::unordered_map<Index, int> op_invokes_;
+  profiling::Profiler prof_;

Review comment:
       we can remove it

##########
File path: include/tvm/runtime/profiling.h
##########
@@ -144,6 +149,134 @@ class Timer : public ObjectRef {
  */
 Timer DefaultTimer(TVMContext ctx);
 
+namespace profiling {
+
+/*! Information about a single function or operator call. */
+struct CallFrame {
+  /*! Context in which the call was made */
+  TVMContext ctx;
+  /*! Name of the function or ap */
+  String name;
+  /*! Runtime of the function or op */
+  Timer timer;
+  /*! Extra performance metrics */
+  std::unordered_map<std::string, ObjectRef> extra_metrics;
+};
+
+/*! Runtime profiler for function and/or operator calls. Used in the graph
+ * runtime and VM to provide profiling information for all operators.
+ *
+ * Example usage:
+ * \code{.cpp}
+ * Profiler prof;
+ * TVMContext cpu, gpu;
+ * prof.Start({cpu, gpu});
+ * prof.StartCall("my_gpu_kernel", gpu);
+ * my_gpu_kernel();
+ * prof.StopCall();
+ * prof.StartCall("my_cpu_function", cpu);
+ * my_cpu_function();
+ * prof.StopCall();
+ * prof.Stop();
+ * std::cout << prof.Report << std::endl; // print profiling report
+ * \endcode
+ */
+class Profiler {
+ public:
+  /*! \brief Start the profiler.
+   * \param ctxs The list of contexts the profiler will be running on. Should
+   *             include all contexts used by profiled operators.
+   *
+   * This function should only be called once per object.
+   */
+  void Start(const std::vector<TVMContext>& ctxs);
+  /*! \brief Stop the profiler.
+   *
+   * This function should only be called once per object after start has been called.
+   */
+  void Stop();
+  /*! \brief Start a function call.
+   * \param name The name of the function being called.
+   * \param ctx The context on which the function is running.
+   * \param extra_metrics Optional additional profiling information to add to
+   * the frame (input sizes, allocations).
+   *
+   * `StartCall` may be nested, but each `StartCall` needs a matching
+   * `StopCall`. Function calls are stopped in LIFO order, so calls to
+   * `StartCall` and `StopCall` must be nested properly.
+   */
+  void StartCall(String name, TVMContext ctx,
+                 std::unordered_map<std::string, ObjectRef> extra_metrics = {});
+  /*! \brief Stop the last `StartCall`.
+   * \param extra_metrics Optional additional profiling information to add to
+   * the frame (input sizes, allocations).
+   */
+  void StopCall(std::unordered_map<std::string, ObjectRef> extra_metrics = {});
+  /*! \brief A textual report of total runtime between `Start` and `Stop` as
+   *        well as individual statistics for each `StartCall`-`StopCall` pair.
+   *  \param aggregate Whether or not to join multiple calls to the same op into a single line.
+   *  \returns The report as a string.
+   */
+  String Report(bool aggregate = true);
+
+ private:
+  std::vector<std::pair<TVMContext, Timer>> global_timers_;
+  std::vector<CallFrame> calls_;
+  std::stack<CallFrame> in_flight_;
+};
+
+/* \brief A duration in time. */
+class DurationNode : public Object {
+ public:
+  /* The duration as a floating point number of microseconds. */
+  double value;

Review comment:
       Suggest changing the name to microseconds to be more specific

##########
File path: src/runtime/vm/profiler/vm.cc
##########
@@ -42,59 +42,44 @@ namespace vm {
 PackedFunc VirtualMachineDebug::GetFunction(const std::string& name,
                                             const ObjectPtr<Object>& sptr_to_self) {
   if (name == "get_stat") {
+    return PackedFunc(
+        [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = prof_.Report(); });
+  } else if (name == "reset") {

Review comment:
       no need for reset.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org