You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/04/12 22:42:54 UTC

[tvm] branch aluo/run-individual-node created (now a1f26fc5b6)

This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a change to branch aluo/run-individual-node
in repository https://gitbox.apache.org/repos/asf/tvm.git


      at a1f26fc5b6 initial

This branch includes the following new commits:

     new a1f26fc5b6 initial

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tvm] 01/01: initial

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/run-individual-node
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit a1f26fc5b62c977df7ca90b536a78864f5a4f821
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Tue Apr 12 15:42:43 2022 -0700

    initial
---
 .../graph_executor/debug/graph_executor_debug.cc   | 114 +++++++++++++--------
 1 file changed, 74 insertions(+), 40 deletions(-)

diff --git a/src/runtime/graph_executor/debug/graph_executor_debug.cc b/src/runtime/graph_executor/debug/graph_executor_debug.cc
index 12a739722a..dd95478e17 100644
--- a/src/runtime/graph_executor/debug/graph_executor_debug.cc
+++ b/src/runtime/graph_executor/debug/graph_executor_debug.cc
@@ -67,48 +67,21 @@ class GraphExecutorDebug : public GraphExecutor {
         time_sec_per_op[index] += RunOpRPC(index, number, repeat, min_repeat_ms);
       }
     } else {
-      for (int i = 0; i < repeat; ++i) {
-        std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds>
-            tbegin, tend;
-        double duration_ms = 0.0;
-        do {
-          std::fill(time_sec_per_op.begin(), time_sec_per_op.end(), 0);
-          if (duration_ms > 0.0) {
-            number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
-                                               number * 1.618));  // 1.618 is chosen by random
-          }
-          tbegin = std::chrono::high_resolution_clock::now();
-          std::vector<std::vector<Timer>> op_timers;
-          for (size_t index = 0; index < op_execs_.size(); index++) {
-            op_timers.push_back({});
-          }
-          for (int k = 0; k < number; k++) {
-            for (size_t index = 0; index < op_execs_.size(); ++index) {
-              if (op_execs_[index]) {
-                op_timers[index].push_back(RunOpHost(index));
-              }
-            }
-          }
-          for (size_t index = 0; index < op_execs_.size(); ++index) {
-            for (auto t : op_timers[index]) {
-              time_sec_per_op[index] += t->SyncAndGetElapsedNanos() / 1e9;
-            }
-          }
-          tend = std::chrono::high_resolution_clock::now();
-          duration_ms =
-              std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() *
-              1000;
-        } while (duration_ms < min_repeat_ms);
-
-        LOG(INFO) << "Iteration: " << i;
-        int op = 0;
-        for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-          if (op_execs_[index]) {
-            time_sec_per_op[index] /= number;
-            LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
-                      << time_sec_per_op[index] * 1e6 << " us/iter";
+      for (size_t index = 0; index < op_execs_.size(); ++index) {
+        std::vector<std::vector<double>> results =
+            RunIndividualNode(index, number, repeat, min_repeat_ms);
+
+        double total = 0.0;
+        for (size_t cur_repeat = 0; cur_repeat < results.size(); cur_repeat++) {
+          std::vector<double>& timings = results[cur_repeat];
+          double total_in_trial = 0;
+          for (double t : timings) {
+            total_in_trial += t;
           }
+          total_in_trial /= timings.size();
+          total += total_in_trial;
         }
+        time_sec_per_op[index] = total / results.size();
       }
     }
 
@@ -119,6 +92,54 @@ class GraphExecutorDebug : public GraphExecutor {
     return os.str();
   }
 
+  std::vector<std::vector<double>> RunIndividualNode(int node_index, int number, int repeat,
+                                                     int min_repeat_ms) {
+    // warmup run
+    GraphExecutor::Run();
+    std::string tkey = module_->type_key();
+
+    // results_in_seconds[a][b] is the bth index run of the ath index repeat
+    std::vector<std::vector<double>> results_in_seconds;
+
+    if (tkey == "rpc") {
+      LOG(FATAL) << "RPC measurements should not use RunIndividualNode!";
+    }
+
+    for (int i = 0; i < repeat; ++i) {
+      std::vector<Timer> op_timers;
+      double duration_ms = 0.0;
+
+      // Keep timing operations, upping number of repeats until we reach min_repeat_ms
+      do {
+        op_timers.clear();
+        if (duration_ms > 0.0) {
+          number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
+                                             number * 1.618));  // 1.618 is chosen by random
+        }
+
+        std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds>
+            tbegin, tend;
+        tbegin = std::chrono::high_resolution_clock::now();
+        for (int k = 0; k < number; k++) {
+          if (op_execs_[node_index]) {
+            op_timers.push_back(RunOpHost(node_index));
+          }
+        }
+        tend = std::chrono::high_resolution_clock::now();
+        duration_ms =
+            std::chrono::duration_cast<std::chrono::duration<double>>(tend - tbegin).count() * 1000;
+      } while (duration_ms < min_repeat_ms);
+
+      std::vector<double> timings_in_seconds;
+      for (Timer t : op_timers) {
+        timings_in_seconds.push_back(t->SyncAndGetElapsedNanos() / 1e9);
+      }
+      results_in_seconds.push_back(timings_in_seconds);
+    }
+
+    return results_in_seconds;
+  }
+
   double RunOpRPC(int index, int number, int repeat, int min_repeat_ms) {
     // Right now we expect either "tvm_op" for nodes which run PackedFunc or "null" for nodes which
     // represent inputs/parameters to the graph. Other types may be supported in the future, but
@@ -362,6 +383,19 @@ PackedFunc GraphExecutorDebug::GetFunction(const std::string& name,
       ICHECK_GE(min_repeat_ms, 0);
       *rv = this->RunIndividual(number, repeat, min_repeat_ms);
     });
+  } else if (name == "run_individual_node") {
+    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+      int node_index = args[0];
+      int number = args[1];
+      int repeat = args[2];
+      int min_repeat_ms = args[3];
+      ICHECK_GE(node_index, 0);
+      ICHECK_LT(node_index, nodes_.size());
+      ICHECK_GT(number, 0);
+      ICHECK_GT(repeat, 0);
+      ICHECK_GE(min_repeat_ms, 0);
+      *rv = this->RunIndividualNode(node_index, number, repeat, min_repeat_ms);
+    });
   } else if (name == "profile") {
     return TypedPackedFunc<profiling::Report(Array<profiling::MetricCollector>)>(
         [sptr_to_self, this](Array<profiling::MetricCollector> collectors) {