You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/04/18 21:52:37 UTC
[tvm] 01/02: initial
This is an automated email from the ASF dual-hosted git repository.
andrewzhaoluo pushed a commit to branch aluo/run-individual-node
in repository https://gitbox.apache.org/repos/asf/tvm.git
commit f8718669194308921dc2d0a4b9137a2f739a45d0
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Tue Apr 12 15:42:43 2022 -0700
initial
---
.../graph_executor/debug/graph_executor_debug.cc | 114 +++++++++++++--------
1 file changed, 74 insertions(+), 40 deletions(-)
diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index 12a739722a..dd95478e17 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -67,48 +67,21 @@ class GraphExecutorDebug : public GraphExecutor {
time_sec_per_op[index] += RunOpRPC(index, number, repeat, min_repeat_ms);
}
} else {
- for (int i = 0; i < repeat; ++i) {
- std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds>
- tbegin, tend;
- double duration_ms = 0.0;
- do {
- std::fill(time_sec_per_op.begin(), time_sec_per_op.end(), 0);
- if (duration_ms > 0.0) {
- number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
- number * 1.618)); // 1.618 is chosen by random
- }
- tbegin = std::chrono::high_resolution_clock::now();
- std::vector<std::vector<Timer>> op_timers;
- for (size_t index = 0; index < op_execs_.size(); index++) {
- op_timers.push_back({});
- }
- for (int k = 0; k < number; k++) {
- for (size_t index = 0; index < op_execs_.size(); ++index) {
- if (op_execs_[index]) {
- op_timers[index].push_back(RunOpHost(index));
- }
- }
- }
- for (size_t index = 0; index < op_execs_.size(); ++index) {
- for (auto t : op_timers[index]) {
- time_sec_per_op[index] += t->SyncAndGetElapsedNanos() / 1e9;
- }
- }
- tend = std::chrono::high_resolution_clock::now();
- duration_ms =
- std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() *
- 1000;
- } while (duration_ms < min_repeat_ms);
-
- LOG(INFO) << "Iteration: " << i;
- int op = 0;
- for (size_t index = 0; index < time_sec_per_op.size(); index++) {
- if (op_execs_[index]) {
- time_sec_per_op[index] /= number;
- LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
- << time_sec_per_op[index] * 1e6 << " us/iter";
+ for (size_t index = 0; index < op_execs_.size(); ++index) {
+ std::vector<std::vector<double>> results =
+ RunIndividualNode(index, number, repeat, min_repeat_ms);
+
+ double total = 0.0;
+ for (size_t cur_repeat = 0; cur_repeat < results.size(); cur_repeat++) {
+ std::vector<double>& timings = results[cur_repeat];
+ double total_in_trial = 0;
+ for (double t : timings) {
+ total_in_trial += t;
}
+ total_in_trial /= timings.size();
+ total += total_in_trial;
}
+ time_sec_per_op[index] = total / results.size();
}
}
@@ -119,6 +92,54 @@ class GraphExecutorDebug : public GraphExecutor {
return os.str();
}
+ std::vector<std::vector<double>> RunIndividualNode(int node_index, int number, int repeat,
+ int min_repeat_ms) {
+ // warmup run
+ GraphExecutor::Run();
+ std::string tkey = module_->type_key();
+
+ // results_in_seconds[a][b] is the bth index run of the ath index repeat
+ std::vector<std::vector<double>> results_in_seconds;
+
+ if (tkey == "rpc") {
+ LOG(FATAL) << "RPC measurements should not use RunIndividualNode!";
+ }
+
+ for (int i = 0; i < repeat; ++i) {
+ std::vector<Timer> op_timers;
+ double duration_ms = 0.0;
+
+ // Keep timing operations, upping number of repeats until we reach min_repeat_ms
+ do {
+ op_timers.clear();
+ if (duration_ms > 0.0) {
+ number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
+ number * 1.618)); // 1.618 is chosen by random
+ }
+
+ std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds>
+ tbegin, tend;
+ tbegin = std::chrono::high_resolution_clock::now();
+ for (int k = 0; k < number; k++) {
+ if (op_execs_[node_index]) {
+ op_timers.push_back(RunOpHost(node_index));
+ }
+ }
+ tend = std::chrono::high_resolution_clock::now();
+ duration_ms =
+ std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() * 1000;
+ } while (duration_ms < min_repeat_ms);
+
+ std::vector<double> timings_in_seconds;
+ for (Timer t : op_timers) {
+ timings_in_seconds.push_back(t->SyncAndGetElapsedNanos() / 1e9);
+ }
+ results_in_seconds.push_back(timings_in_seconds);
+ }
+
+ return results_in_seconds;
+ }
+
double RunOpRPC(int index, int number, int repeat, int min_repeat_ms) {
// Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes which
// represent inputs/parameters to the graph. Other types may be supported in the future, but
@@ -362,6 +383,19 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
ICHECK_GE(min_repeat_ms, 0);
*rv = this->RunIndividual(number, repeat, min_repeat_ms);
});
+ } else if (name == "run_individual_node") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ int node_index = args[0];
+ int number = args[1];
+ int repeat = args[2];
+ int min_repeat_ms = args[3];
+ ICHECK_GE(node_index, 0);
+ ICHECK_LT(node_index, nodes_.size());
+ ICHECK_GT(number, 0);
+ ICHECK_GT(repeat, 0);
+ ICHECK_GE(min_repeat_ms, 0);
+ *rv = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms);
+ });
} else if (name == "profile") {
return TypedPackedFunc<profiling::Report(Array<profiling::MetricCollector>)>(
[sptr_to_self, this](Array<profiling::MetricCollector> collectors) {