You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2020/12/28 15:29:56 UTC
[tvm] branch main updated: [µTVM] Add platform timer and RPCTimeEvaluator to enable AutoTVM (#6964)
This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new b8ac8d9 [µTVM] Add platform timer and RPCTimeEvaluator to enable AutoTVM (#6964)
b8ac8d9 is described below
commit b8ac8d94dec93cdaaec436f9105ea9b2eef752d0
Author: Andrew Reusch <ar...@octoml.ai>
AuthorDate: Mon Dec 28 07:29:37 2020 -0800
[µTVM] Add platform timer and RPCTimeEvaluator to enable AutoTVM (#6964)
* Add platform timer to microTVM.
* Address liangfu comments
* cppformat
* clang-format
Co-authored-by: Liangfu Chen <li...@apache.org>
---
apps/bundle_deploy/bundle.c | 6 +
apps/bundle_deploy/bundle_static.c | 6 +
include/tvm/runtime/c_runtime_api.h | 7 ++
include/tvm/runtime/crt/error_codes.h | 5 +
include/tvm/runtime/crt/platform.h | 19 +++
python/tvm/micro/session.py | 1 +
src/runtime/c_runtime_api.cc | 9 ++
src/runtime/crt/common/crt_runtime_api.c | 164 +++++++++++++++++++++++--
src/runtime/crt/host/crt_config.h | 2 +-
src/runtime/crt/host/main.cc | 23 ++--
src/runtime/graph/debug/graph_runtime_debug.cc | 126 +++++++++++++------
src/runtime/minrpc/minrpc_server.h | 1 +
tests/micro/qemu/test_zephyr.py | 27 ++++
tests/micro/qemu/zephyr-runtime/src/main.c | 18 +--
tests/python/unittest/test_crt.py | 29 ++++-
15 files changed, 372 insertions(+), 71 deletions(-)
diff --git a/apps/bundle_deploy/bundle.c b/apps/bundle_deploy/bundle.c
index 29712de..098ac99 100644
--- a/apps/bundle_deploy/bundle.c
+++ b/apps/bundle_deploy/bundle.c
@@ -123,3 +123,9 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void*
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) {
return g_memory_manager->Free(g_memory_manager, ptr, ctx);
}
+
+tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; }
+
+tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
+ return kTvmErrorFunctionCallNotImplemented;
+}
diff --git a/apps/bundle_deploy/bundle_static.c b/apps/bundle_deploy/bundle_static.c
index 7ac95fd..c4b637c 100644
--- a/apps/bundle_deploy/bundle_static.c
+++ b/apps/bundle_deploy/bundle_static.c
@@ -124,3 +124,9 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void*
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) {
return g_memory_manager->Free(g_memory_manager, ptr, ctx);
}
+
+tvm_crt_error_t TVMPlatformTimerStart() { return kTvmErrorFunctionCallNotImplemented; }
+
+tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
+ return kTvmErrorFunctionCallNotImplemented;
+}
diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h
index aac49c1..467e69a 100644
--- a/include/tvm/runtime/c_runtime_api.h
+++ b/include/tvm/runtime/c_runtime_api.h
@@ -540,6 +540,13 @@ TVM_DLL int TVMObjectRetain(TVMObjectHandle obj);
TVM_DLL int TVMObjectFree(TVMObjectHandle obj);
/*!
+ * \brief Free a TVMByteArray returned from TVMFuncCall, and associated memory.
+ * \param arr The TVMByteArray instance.
+ * \return 0 on success, -1 on failure.
+ */
+TVM_DLL int TVMByteArrayFree(TVMByteArray* arr);
+
+/*!
* \brief Allocate a data space on device.
* \param ctx The device context to perform operation.
* \param nbytes The number of bytes in memory.
diff --git a/include/tvm/runtime/crt/error_codes.h b/include/tvm/runtime/crt/error_codes.h
index 41d727d..75e49e6 100644
--- a/include/tvm/runtime/crt/error_codes.h
+++ b/include/tvm/runtime/crt/error_codes.h
@@ -44,6 +44,7 @@ typedef enum {
kTvmErrorCategoryGenerated = 6,
kTvmErrorCategoryGraphRuntime = 7,
kTvmErrorCategoryFunctionCall = 8,
+ kTvmErrorCategoryTimeEvaluator = 9,
} tvm_crt_error_category_t;
typedef enum {
@@ -77,6 +78,7 @@ typedef enum {
kTvmErrorPlatformMemoryManagerInitialized = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 1),
kTvmErrorPlatformShutdown = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 2),
kTvmErrorPlatformNoMemory = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 3),
+ kTvmErrorPlatformTimerBadState = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryPlatform, 4),
// Common error codes returned from generated functions.
kTvmErrorGeneratedInvalidStorageId = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryGenerated, 0),
@@ -91,6 +93,9 @@ typedef enum {
kTvmErrorFunctionCallWrongArgType = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 1),
kTvmErrorFunctionCallNotImplemented = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryFunctionCall, 2),
+ // Time Evaluator - times functions for use with debug runtime.
+ kTvmErrorTimeEvaluatorBadHandle = DEFINE_TVM_CRT_ERROR(kTvmErrorCategoryTimeEvaluator, 0),
+
// System errors are always negative integers; this mask indicates presence of a system error.
// Cast tvm_crt_error_t to a signed integer to interpret the negative error code.
kTvmErrorSystemErrorMask = (1 << (sizeof(int) * 4 - 1)),
diff --git a/include/tvm/runtime/crt/platform.h b/include/tvm/runtime/crt/platform.h
index 12dcdc5..8e03839 100644
--- a/include/tvm/runtime/crt/platform.h
+++ b/include/tvm/runtime/crt/platform.h
@@ -78,6 +78,25 @@ tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLContext ctx, void*
* \return kTvmErrorNoError if successful; a descriptive error code otherwise.
*/
tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx);
+
+/*! \brief Start a device timer.
+ *
+ * The device timer used must not be running.
+ *
+ * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
+ */
+tvm_crt_error_t TVMPlatformTimerStart();
+
+/*! \brief Stop the running device timer and get the elapsed time (in microseconds).
+ *
+ * The device timer used must be running.
+ *
+ * \param elapsed_time_seconds Pointer to write elapsed time into.
+ *
+ * \return kTvmErrorNoError if successful; a descriptive error code otherwise.
+ */
+tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/python/tvm/micro/session.py b/python/tvm/micro/session.py
index 0f2f09a..717b6e4 100644
--- a/python/tvm/micro/session.py
+++ b/python/tvm/micro/session.py
@@ -23,6 +23,7 @@ import sys
from ..error import register_error
from .._ffi import get_global_func
from ..contrib import graph_runtime
+from ..contrib.debugger import debug_runtime
from ..rpc import RPCSession
from .transport import IoTimeoutError
from .transport import TransportLogger
diff --git a/src/runtime/c_runtime_api.cc b/src/runtime/c_runtime_api.cc
index 299f282..6ecc60a 100644
--- a/src/runtime/c_runtime_api.cc
+++ b/src/runtime/c_runtime_api.cc
@@ -420,6 +420,15 @@ int TVMFuncFree(TVMFunctionHandle func) {
API_END();
}
+int TVMByteArrayFree(TVMByteArray* arr) {
+ if (arr == &TVMAPIRuntimeStore::Get()->ret_bytes) {
+ return 0; // Thread-local storage does not need explicit deleting.
+ }
+
+ delete arr;
+ return 0;
+}
+
int TVMFuncCall(TVMFunctionHandle func, TVMValue* args, int* arg_type_codes, int num_args,
TVMValue* ret_val, int* ret_type_code) {
API_BEGIN();
diff --git a/src/runtime/crt/common/crt_runtime_api.c b/src/runtime/crt/common/crt_runtime_api.c
index ac2b99a..fcfb51f 100644
--- a/src/runtime/crt/common/crt_runtime_api.c
+++ b/src/runtime/crt/common/crt_runtime_api.c
@@ -110,6 +110,9 @@ static const TVMModule* registered_modules[TVM_CRT_MAX_REGISTERED_MODULES];
/*! \brief Passed as `module_index` to EncodeFunctionHandle. */
static const tvm_module_index_t kGlobalFuncModuleIndex = TVM_CRT_MAX_REGISTERED_MODULES;
+/*! \brief Special module handle for retur values from RPCTimeEvaluator. */
+static const tvm_module_index_t kTimeEvaluatorModuleIndex = 0x7fff;
+
static int DecodeModuleHandle(TVMModuleHandle handle, tvm_module_index_t* out_module_index) {
tvm_module_index_t module_index;
@@ -185,13 +188,15 @@ static int DecodeFunctionHandle(TVMFunctionHandle handle, tvm_module_index_t* mo
(tvm_module_index_t)(((uintptr_t)handle) >> (sizeof(tvm_function_index_t) * 8));
unvalidated_module_index &= ~0x8000;
- if (unvalidated_module_index > kGlobalFuncModuleIndex) {
- TVMAPIErrorf("invalid module handle: index=%08x", unvalidated_module_index);
- return -1;
- } else if (unvalidated_module_index < kGlobalFuncModuleIndex &&
- registered_modules[unvalidated_module_index] == NULL) {
- TVMAPIErrorf("unregistered module: index=%08x", unvalidated_module_index);
- return -1;
+ if (unvalidated_module_index != kTimeEvaluatorModuleIndex) {
+ if (unvalidated_module_index > kGlobalFuncModuleIndex) {
+ TVMAPIErrorf("invalid module handle: index=%08x", unvalidated_module_index);
+ return -1;
+ } else if (unvalidated_module_index < kGlobalFuncModuleIndex &&
+ registered_modules[unvalidated_module_index] == NULL) {
+ TVMAPIErrorf("unregistered module: index=%08x", unvalidated_module_index);
+ return -1;
+ }
}
*function_index = ((uint32_t)((uintptr_t)handle)) & ~0x8000;
@@ -199,6 +204,20 @@ static int DecodeFunctionHandle(TVMFunctionHandle handle, tvm_module_index_t* mo
return 0;
}
+int TVMByteArrayFree(TVMByteArray* arr) {
+ DLContext ctx = {kDLCPU, 0};
+ int to_return = TVMPlatformMemoryFree((void*)arr->data, ctx);
+ if (to_return != 0) {
+ return to_return;
+ }
+
+ return TVMPlatformMemoryFree((void*)arr, ctx);
+}
+
+tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args,
+ int* type_codes, int num_args, TVMValue* ret_val,
+ int* ret_type_code);
+
int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_codes, int num_args,
TVMValue* ret_val, int* ret_type_code) {
tvm_module_index_t module_index;
@@ -211,7 +230,10 @@ int TVMFuncCall(TVMFunctionHandle func_handle, TVMValue* arg_values, int* type_c
return -1;
}
- if (module_index == kGlobalFuncModuleIndex) {
+ if (module_index == kTimeEvaluatorModuleIndex) {
+ return RunTimeEvaluator(function_index, arg_values, type_codes, num_args, ret_val,
+ ret_type_code);
+ } else if (module_index == kGlobalFuncModuleIndex) {
resource_handle = NULL;
registry = &global_func_registry.registry;
} else {
@@ -315,6 +337,8 @@ int TVMFuncFree(TVMFunctionHandle func) {
return 0;
}
+int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
+ int* ret_type_code);
tvm_crt_error_t TVMInitializeRuntime() {
int idx = 0;
tvm_crt_error_t error = kTvmErrorNoError;
@@ -351,6 +375,10 @@ tvm_crt_error_t TVMInitializeRuntime() {
error = TVMFuncRegisterGlobal("tvm.rpc.server.ModuleGetFunction", &ModuleGetFunction, 0);
}
+ if (error == kTvmErrorNoError) {
+ error = TVMFuncRegisterGlobal("runtime.RPCTimeEvaluator", &RPCTimeEvaluator, 0);
+ }
+
if (error != kTvmErrorNoError) {
TVMPlatformMemoryFree(registry_backing_memory, ctx);
TVMPlatformMemoryFree(func_registry_memory, ctx);
@@ -358,3 +386,123 @@ tvm_crt_error_t TVMInitializeRuntime() {
return error;
}
+
+typedef struct {
+ uint16_t function_index;
+ TVMFunctionHandle func_to_time;
+ TVMContext ctx;
+ int number;
+ int repeat;
+ int min_repeat_ms;
+} time_evaluator_state_t;
+
+static time_evaluator_state_t g_time_evaluator_state;
+
+int RPCTimeEvaluator(TVMValue* args, int* type_codes, int num_args, TVMValue* ret_val,
+ int* ret_type_code) {
+ ret_val[0].v_handle = NULL;
+ ret_type_code[0] = kTVMNullptr;
+ if (num_args < 8) {
+ TVMAPIErrorf("not enough args");
+ return kTvmErrorFunctionCallNumArguments;
+ }
+ if (type_codes[0] != kTVMModuleHandle || type_codes[1] != kTVMStr ||
+ type_codes[2] != kTVMArgInt || type_codes[3] != kTVMArgInt || type_codes[4] != kTVMArgInt ||
+ type_codes[5] != kTVMArgInt || type_codes[6] != kTVMArgInt || type_codes[7] != kTVMStr) {
+ TVMAPIErrorf("one or more invalid arg types");
+ return kTvmErrorFunctionCallWrongArgType;
+ }
+
+ TVMModuleHandle mod = (TVMModuleHandle)args[0].v_handle;
+ const char* name = args[1].v_str;
+ g_time_evaluator_state.ctx.device_type = args[2].v_int64;
+ g_time_evaluator_state.ctx.device_id = args[3].v_int64;
+ g_time_evaluator_state.number = args[4].v_int64;
+ g_time_evaluator_state.repeat = args[5].v_int64;
+ g_time_evaluator_state.min_repeat_ms = args[6].v_int64;
+
+ int ret_code =
+ TVMModGetFunction(mod, name, /* query_imports */ 0, &g_time_evaluator_state.func_to_time);
+ if (ret_code != 0) {
+ return ret_code;
+ }
+
+ g_time_evaluator_state.function_index++;
+ ret_val[0].v_handle =
+ EncodeFunctionHandle(kTimeEvaluatorModuleIndex, g_time_evaluator_state.function_index);
+ ret_type_code[0] = kTVMPackedFuncHandle;
+ return kTvmErrorNoError;
+}
+
+tvm_crt_error_t RunTimeEvaluator(tvm_function_index_t function_index, TVMValue* args,
+ int* type_codes, int num_args, TVMValue* ret_val,
+ int* ret_type_code) {
+ if (function_index != g_time_evaluator_state.function_index) {
+ return kTvmErrorTimeEvaluatorBadHandle;
+ }
+
+ // TODO(areusch): should *really* rethink needing to return doubles
+ DLContext result_byte_ctx = {kDLCPU, 0};
+ TVMByteArray* result_byte_arr = NULL;
+ tvm_crt_error_t err =
+ TVMPlatformMemoryAllocate(sizeof(TVMByteArray), result_byte_ctx, (void*)&result_byte_arr);
+ if (err != kTvmErrorNoError) {
+ goto release_and_return;
+ }
+ result_byte_arr->data = NULL;
+ size_t data_size = sizeof(double) * g_time_evaluator_state.repeat;
+ err = TVMPlatformMemoryAllocate(data_size, result_byte_ctx, (void*)&result_byte_arr->data);
+ if (err != kTvmErrorNoError) {
+ goto release_and_return;
+ }
+ result_byte_arr->size = data_size;
+ double min_repeat_seconds = ((double)g_time_evaluator_state.min_repeat_ms) / 1000;
+ double* iter = (double*)result_byte_arr->data;
+ for (int i = 0; i < g_time_evaluator_state.repeat; i++) {
+ double repeat_res_seconds = 0.0;
+ int exec_count = 0;
+ // do-while structure ensures we run even when `min_repeat_ms` isn't set (i.e., is 0).
+ do {
+ err = TVMPlatformTimerStart();
+ if (err != kTvmErrorNoError) {
+ goto release_and_return;
+ }
+
+ for (int j = 0; j < g_time_evaluator_state.number; j++) {
+ err = TVMFuncCall(g_time_evaluator_state.func_to_time, args, type_codes, num_args, ret_val,
+ ret_type_code);
+ if (err != kTvmErrorNoError) {
+ goto release_and_return;
+ }
+ }
+ exec_count += g_time_evaluator_state.number;
+
+ double curr_res_seconds;
+ err = TVMPlatformTimerStop(&curr_res_seconds);
+ if (err != kTvmErrorNoError) {
+ goto release_and_return;
+ }
+ repeat_res_seconds += curr_res_seconds;
+ } while (repeat_res_seconds < min_repeat_seconds);
+ double mean_exec_seconds = repeat_res_seconds / exec_count;
+ *iter = mean_exec_seconds;
+ iter++;
+ }
+
+ *ret_type_code = kTVMBytes;
+ ret_val->v_handle = result_byte_arr;
+ return err;
+
+release_and_return : {
+ tvm_crt_error_t release_err =
+ TVMPlatformMemoryFree((void*)&result_byte_arr->data, result_byte_ctx);
+ if (release_err != kTvmErrorNoError) {
+ release_err = TVMPlatformMemoryFree((void*)&result_byte_arr, result_byte_ctx);
+ }
+
+ if (err == kTvmErrorNoError && release_err != kTvmErrorNoError) {
+ err = release_err;
+ }
+}
+ return err;
+}
diff --git a/src/runtime/crt/host/crt_config.h b/src/runtime/crt/host/crt_config.h
index 6891896..109abaf 100644
--- a/src/runtime/crt/host/crt_config.h
+++ b/src/runtime/crt/host/crt_config.h
@@ -43,7 +43,7 @@
#define TVM_CRT_MAX_REGISTERED_MODULES 2
/*! Size of the global function registry, in bytes. */
-#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 200
+#define TVM_CRT_GLOBAL_FUNC_REGISTRY_SIZE_BYTES 256
/*! Maximum packet size, in bytes, including the length header. */
#define TVM_CRT_MAX_PACKET_SIZE_BYTES 64000
diff --git a/src/runtime/crt/host/main.cc b/src/runtime/crt/host/main.cc
index ba43e84..7db17f5 100644
--- a/src/runtime/crt/host/main.cc
+++ b/src/runtime/crt/host/main.cc
@@ -68,29 +68,30 @@ tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLContext ctx) {
return memory_manager->Free(memory_manager, ptr, ctx);
}
-high_resolution_clock::time_point g_utvm_start_time;
+steady_clock::time_point g_utvm_start_time;
int g_utvm_timer_running = 0;
-int TVMPlatformTimerStart() {
+tvm_crt_error_t TVMPlatformTimerStart() {
if (g_utvm_timer_running) {
std::cerr << "timer already running" << std::endl;
- return -1;
+ return kTvmErrorPlatformTimerBadState;
}
- g_utvm_start_time = high_resolution_clock::now();
+ g_utvm_start_time = std::chrono::steady_clock::now();
g_utvm_timer_running = 1;
- return 0;
+ return kTvmErrorNoError;
}
-int TVMPlatformTimerStop(double* res_us) {
+tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
if (!g_utvm_timer_running) {
std::cerr << "timer not running" << std::endl;
- return -1;
+ return kTvmErrorPlatformTimerBadState;
}
- auto utvm_stop_time = high_resolution_clock::now();
- duration<double, std::micro> time_span(utvm_stop_time - g_utvm_start_time);
- *res_us = time_span.count();
+ auto utvm_stop_time = std::chrono::steady_clock::now();
+ std::chrono::microseconds time_span =
+ std::chrono::duration_cast<std::chrono::microseconds>(utvm_stop_time - g_utvm_start_time);
+ *elapsed_time_seconds = static_cast<double>(time_span.count()) / 1e6;
g_utvm_timer_running = 0;
- return 0;
+ return kTvmErrorNoError;
}
}
diff --git a/src/runtime/graph/debug/graph_runtime_debug.cc b/src/runtime/graph/debug/graph_runtime_debug.cc
index d02a6d9..3353c11 100644
--- a/src/runtime/graph/debug/graph_runtime_debug.cc
+++ b/src/runtime/graph/debug/graph_runtime_debug.cc
@@ -58,56 +58,106 @@ class GraphRuntimeDebug : public GraphRuntime {
std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
// warmup run
GraphRuntime::Run();
- std::ostringstream os;
+ std::string tkey = module_->type_key();
std::vector<double> time_per_op(op_execs_.size(), 0);
- for (int i = 0; i < repeat; ++i) {
- std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds> tbegin,
- tend;
- double duration_ms = 0.0;
- do {
- std::fill(time_per_op.begin(), time_per_op.end(), 0);
- if (duration_ms > 0.0) {
- number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
- number * 1.618)); // 1.618 is chosen by random
- }
- tbegin = std::chrono::high_resolution_clock::now();
- for (int k = 0; k < number; k++) {
- for (size_t index = 0; index < op_execs_.size(); ++index) {
- if (op_execs_[index]) {
- const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx;
- auto op_tbegin = std::chrono::high_resolution_clock::now();
- op_execs_[index]();
- TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
- auto op_tend = std::chrono::high_resolution_clock::now();
- double op_duration =
- std::chrono::duration_cast<std::chrono::duration<double> >(op_tend - op_tbegin)
- .count();
- time_per_op[index] += op_duration * 1e6; // us
+ if (tkey == "rpc") {
+ // RPC modules rely on remote timing which implements the logic from the else branch.
+ for (size_t index = 0; index < op_execs_.size(); ++index) {
+ time_per_op[index] += RunOpRPC(index, number, repeat, min_repeat_ms);
+ }
+ } else {
+ for (int i = 0; i < repeat; ++i) {
+ std::chrono::time_point<std::chrono::high_resolution_clock, std::chrono::nanoseconds>
+ tbegin, tend;
+ double duration_ms = 0.0;
+ do {
+ std::fill(time_per_op.begin(), time_per_op.end(), 0);
+ if (duration_ms > 0.0) {
+ number = static_cast<int>(std::max((min_repeat_ms / (duration_ms / number) + 1),
+ number * 1.618)); // 1.618 is chosen by random
+ }
+ tbegin = std::chrono::high_resolution_clock::now();
+ for (int k = 0; k < number; k++) {
+ for (size_t index = 0; index < op_execs_.size(); ++index) {
+ if (op_execs_[index]) {
+ time_per_op[index] += RunOpHost(index);
+ }
}
}
- }
- tend = std::chrono::high_resolution_clock::now();
- duration_ms =
- std::chrono::duration_cast<std::chrono::duration<double> >(tend - tbegin).count() *
- 1000;
- } while (duration_ms < min_repeat_ms);
-
- LOG(INFO) << "Iteration: " << i;
- int op = 0;
- for (size_t index = 0; index < time_per_op.size(); index++) {
- if (op_execs_[index]) {
- time_per_op[index] /= number;
- LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " << time_per_op[index]
- << " us/iter";
+ tend = std::chrono::high_resolution_clock::now();
+ duration_ms =
+ std::chrono::duration_cast<std::chrono::duration<double> >(tend - tbegin).count() *
+ 1000;
+ } while (duration_ms < min_repeat_ms);
+
+ LOG(INFO) << "Iteration: " << i;
+ int op = 0;
+ for (size_t index = 0; index < time_per_op.size(); index++) {
+ if (op_execs_[index]) {
+ time_per_op[index] /= number;
+ LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": " << time_per_op[index]
+ << " us/iter";
+ }
}
}
}
+
+ std::ostringstream os;
for (size_t index = 0; index < time_per_op.size(); index++) {
os << time_per_op[index] << ",";
}
return os.str();
}
+ double RunOpRPC(int index, int number, int repeat, int min_repeat_ms) {
+ const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx;
+ TVMOpParam param = nodes_[index].param;
+ std::string name = param.func_name;
+ uint32_t num_inputs = param.num_inputs;
+ uint32_t num_outputs = param.num_outputs;
+
+ PackedFunc time_eval = runtime::Registry::Get("runtime.RPCTimeEvaluator")
+ ->
+ operator()(module_, name, static_cast<int>(ctx.device_type),
+ ctx.device_id, number, repeat, min_repeat_ms, "");
+
+ int num_flat_args = num_inputs + num_outputs;
+ std::unique_ptr<TVMValue> values(new TVMValue[num_flat_args]);
+ std::unique_ptr<int> type_codes(new int[num_flat_args]);
+ TVMArgsSetter setter(values.get(), type_codes.get());
+ int offs = 0;
+ const auto& inode = nodes_[index];
+ for (const auto& e : inode.inputs) {
+ uint32_t eid = this->entry_id(e);
+ DLTensor* arg = const_cast<DLTensor*>(data_entry_[eid].operator->());
+ setter(offs, arg);
+ offs++;
+ }
+ for (uint32_t i = 0; i < num_outputs; ++i) {
+ uint32_t eid = this->entry_id(index, i);
+ DLTensor* arg = const_cast<DLTensor*>(data_entry_[eid].operator->());
+ setter(offs, arg);
+ offs++;
+ }
+ TVMRetValue rv;
+ time_eval.CallPacked(TVMArgs(values.get(), type_codes.get(), num_flat_args), &rv);
+ std::string results = rv.operator std::string();
+ const double* results_arr = reinterpret_cast<const double*>(results.data());
+ LOG(INFO) << "Got op timing: " << results_arr[0];
+ return results_arr[0];
+ }
+
+ double RunOpHost(int index) {
+ auto op_tbegin = std::chrono::high_resolution_clock::now();
+ op_execs_[index]();
+ const TVMContext& ctx = data_entry_[entry_id(index, 0)]->ctx;
+ TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
+ auto op_tend = std::chrono::high_resolution_clock::now();
+ double op_duration =
+ std::chrono::duration_cast<std::chrono::duration<double> >(op_tend - op_tbegin).count();
+ return op_duration;
+ }
+
/*!
* \brief Run each operation and get the output.
* \param index The index of op which needs to be returned.
diff --git a/src/runtime/minrpc/minrpc_server.h b/src/runtime/minrpc/minrpc_server.h
index 62f7236..d28e0c3 100644
--- a/src/runtime/minrpc/minrpc_server.h
+++ b/src/runtime/minrpc/minrpc_server.h
@@ -156,6 +156,7 @@ class MinRPCServer {
} else if (rv_tcode == kTVMBytes) {
ret_tcode[1] = kTVMBytes;
this->ReturnPackedSeq(ret_value, ret_tcode, 2);
+ TVMByteArrayFree(reinterpret_cast<TVMByteArray*>(ret_value[1].v_handle)); // NOLINT(*)
} else if (rv_tcode == kTVMPackedFuncHandle || rv_tcode == kTVMModuleHandle) {
ret_tcode[1] = kTVMOpaqueHandle;
this->ReturnPackedSeq(ret_value, ret_tcode, 2);
diff --git a/tests/micro/qemu/test_zephyr.py b/tests/micro/qemu/test_zephyr.py
index 3e73307..1c38c2d 100644
--- a/tests/micro/qemu/test_zephyr.py
+++ b/tests/micro/qemu/test_zephyr.py
@@ -143,6 +143,33 @@ def test_compile_runtime(platform):
test_basic_add(sess)
+def test_platform_timer(platform):
+ """Test compiling the on-device runtime."""
+
+ model, zephyr_board = PLATFORMS[platform]
+
+ # NOTE: run test in a nested function so cPython will delete arrays before closing the session.
+ def test_basic_add(sess):
+ A_data = tvm.nd.array(np.array([2, 3], dtype="int8"), ctx=sess.context)
+ assert (A_data.asnumpy() == np.array([2, 3])).all()
+ B_data = tvm.nd.array(np.array([4], dtype="int8"), ctx=sess.context)
+ assert (B_data.asnumpy() == np.array([4])).all()
+ C_data = tvm.nd.array(np.array([0, 0], dtype="int8"), ctx=sess.context)
+ assert (C_data.asnumpy() == np.array([0, 0])).all()
+
+ system_lib = sess.get_system_lib()
+ time_eval_f = system_lib.time_evaluator(
+ "add", sess.context, number=20, repeat=3, min_repeat_ms=40
+ )
+ result = time_eval_f(A_data, B_data, C_data)
+ assert (C_data.asnumpy() == np.array([6, 7])).all()
+ assert result.mean > 0
+ assert len(result.results) == 3
+
+ with _make_add_sess(model, zephyr_board) as sess:
+ test_basic_add(sess)
+
+
def test_relay(platform):
"""Testing a simple relay graph"""
model, zephyr_board = PLATFORMS[platform]
diff --git a/tests/micro/qemu/zephyr-runtime/src/main.c b/tests/micro/qemu/zephyr-runtime/src/main.c
index 86b2b27..9d10504 100644
--- a/tests/micro/qemu/zephyr-runtime/src/main.c
+++ b/tests/micro/qemu/zephyr-runtime/src/main.c
@@ -99,10 +99,10 @@ int g_utvm_timer_running = 0;
static struct device* led_pin;
#endif // CONFIG_LED
-int TVMPlatformTimerStart() {
+tvm_crt_error_t TVMPlatformTimerStart() {
if (g_utvm_timer_running) {
TVMLogf("timer already running");
- return -1;
+ return kTvmErrorPlatformTimerBadState;
}
#ifdef CONFIG_LED
@@ -111,13 +111,13 @@ int TVMPlatformTimerStart() {
k_timer_start(&g_utvm_timer, TIME_TIL_EXPIRY, TIME_TIL_EXPIRY);
g_utvm_start_time = k_cycle_get_32();
g_utvm_timer_running = 1;
- return 0;
+ return kTvmErrorNoError;
}
-int TVMPlatformTimerStop(double* res_us) {
+tvm_crt_error_t TVMPlatformTimerStop(double* elapsed_time_seconds) {
if (!g_utvm_timer_running) {
TVMLogf("timer not running");
- return -1;
+ return kTvmErrorPlatformTimerBadState;
}
uint32_t stop_time = k_cycle_get_32();
@@ -134,7 +134,7 @@ int TVMPlatformTimerStop(double* res_us) {
}
uint32_t ns_spent = (uint32_t)k_cyc_to_ns_floor64(cycles_spent);
- double hw_clock_res_us = ns_spent / 1000.0;
+ double hw_clock_elapsed_seconds = ns_spent / 1e9;
// need to grab time remaining *before* stopping. when stopped, this function
// always returns 0.
@@ -152,13 +152,13 @@ int TVMPlatformTimerStop(double* res_us) {
// if we approach the limits of the HW clock datatype (uint32_t), use the
// coarse-grained timer result instead
if (approx_num_cycles > (0.5 * (~((uint32_t)0)))) {
- *res_us = timer_res_ms * 1000.0;
+ *elapsed_time_seconds = timer_res_ms / 1e3;
} else {
- *res_us = hw_clock_res_us;
+ *elapsed_time_seconds = hw_clock_elapsed_seconds;
}
g_utvm_timer_running = 0;
- return 0;
+ return kTvmErrorNoError;
}
#define RING_BUF_SIZE 512
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index 1d84d4e..659d190 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -25,8 +25,10 @@ import subprocess
import textwrap
import numpy as np
+import pytest
import tvm
+import tvm.testing
import tvm.relay
import tvm.testing
@@ -172,8 +174,27 @@ def test_std_math_functions():
np.testing.assert_allclose(B_data.asnumpy(), np.array([7.389056, 20.085537]))
+@tvm.testing.requires_micro
+def test_platform_timer():
+ """Verify the platform timer can be used to time remote functions."""
+ import tvm.micro
+
+ workspace = tvm.micro.Workspace()
+ A = tvm.te.placeholder((2,), dtype="float32", name="A")
+ B = tvm.te.compute(A.shape, lambda i: tvm.te.exp(A[i]), name="B")
+ s = tvm.te.create_schedule(B.op)
+
+ with _make_sess_from_op(workspace, "myexpf", s, [A, B]) as sess:
+ A_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), ctx=sess.context)
+ B_data = tvm.nd.array(np.array([2.0, 3.0], dtype="float32"), ctx=sess.context)
+ lib = sess.get_system_lib()
+ time_eval_f = lib.time_evaluator(
+ "myexpf", sess.context, number=2000, repeat=3, min_repeat_ms=40
+ )
+ result = time_eval_f(A_data, B_data)
+ assert result.mean > 0
+ assert len(result.results) == 3
+
+
if __name__ == "__main__":
- test_compile_runtime()
- test_reset()
- test_graph_runtime()
- test_std_math_functions()
+ sys.exit(pytest.main([__file__] + sys.argv[1:]))