You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2022/04/15 07:29:42 UTC
[tvm] branch main updated: [Metaschedule] Enable continuing tuning after schedule application failure (#10937)
This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 715f24d238 [Metaschedule] Enable continuing tuning after schedule application failure (#10937)
715f24d238 is described below
commit 715f24d2381d6dd9ce016f7214fe994a574fb358
Author: Masahiro Masuda <ma...@gmail.com>
AuthorDate: Fri Apr 15 16:29:36 2022 +0900
[Metaschedule] Enable continuing tuning after schedule application failure (#10937)
Currently, when there is a failure in schedule application during tuning (e.g. tensorize), the entire tuning session is killed with an error msg like `RuntimeError: parallel_for_dynamic error with ...`. We should gracefully handle such errors and let tuning continue on other candidates.
No test is added since I don't know how to get tuning to fail in a controlled manner.
---
src/meta_schedule/task_scheduler/task_scheduler.cc | 1 +
src/meta_schedule/utils.h | 12 ++++++++++--
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/src/meta_schedule/task_scheduler/task_scheduler.cc b/src/meta_schedule/task_scheduler/task_scheduler.cc
index e30295fd1a..cd287fc1d4 100644
--- a/src/meta_schedule/task_scheduler/task_scheduler.cc
+++ b/src/meta_schedule/task_scheduler/task_scheduler.cc
@@ -34,6 +34,7 @@ void SendToBuilder(const Builder& builder, const TuneContext& context) {
Array<BuilderInput> inputs;
inputs.reserve(candidates.size());
for (const MeasureCandidate& candidate : candidates) {
+ ICHECK(candidate.defined()) << "Undefined MeasureCandidate found";
inputs.push_back(BuilderInput(candidate->sch->mod(), target));
}
context->builder_results = builder->Build(inputs);
diff --git a/src/meta_schedule/utils.h b/src/meta_schedule/utils.h
index 45a04958ad..a29f991cbb 100644
--- a/src/meta_schedule/utils.h
+++ b/src/meta_schedule/utils.h
@@ -36,6 +36,7 @@
#include <tvm/meta_schedule/tune_context.h>
#include <tvm/node/node.h>
#include <tvm/node/serialization.h>
+#include <tvm/runtime/container/optional.h>
#include <tvm/support/parallel_for.h>
#include <tvm/tir/schedule/schedule.h>
@@ -308,12 +309,19 @@ struct ThreadedTraceApply {
/*rand_state=*/ForkSeed(rand_state),
/*debug_mode=*/0,
/*error_render_level=*/tir::ScheduleErrorRenderLevel::kNone);
+
trace->ApplyToSchedule(sch, /*remove_postproc=*/true);
sch->EnterPostproc();
+
for (int i = 0; i < n_; ++i) {
Item& item = items_[i];
- if (!item.postproc->Apply(sch)) {
- ++item.fail_counter;
+ try {
+ if (!item.postproc->Apply(sch)) {
+ ++item.fail_counter;
+ return NullOpt;
+ }
+ } catch (const std::exception& e) {
+ LOG(WARNING) << "ThreadedTraceApply::Apply failed with error " << e.what();
return NullOpt;
}
}