You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by tq...@apache.org on 2020/10/23 17:18:11 UTC
[incubator-tvm] branch main updated: [AutoScheduler] Guarantee init
population sampling outputs a valid set (#6713)
This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 7158a4b [AutoScheduler] Guarantee init population sampling outputs a valid set (#6713)
7158a4b is described below
commit 7158a4bf14c550320cc8af46b5bc48ed56d3037c
Author: Cody Yu <co...@gmail.com>
AuthorDate: Fri Oct 23 10:17:53 2020 -0700
[AutoScheduler] Guarantee init population sampling outputs a valid set (#6713)
---
python/tvm/auto_scheduler/cost_model/xgb_model.py | 2 +-
python/tvm/auto_scheduler/search_policy.py | 3 +-
src/auto_scheduler/compute_dag.cc | 2 +-
src/auto_scheduler/search_policy/sketch_policy.cc | 70 ++++++++++++++++++----
src/auto_scheduler/search_policy/sketch_policy.h | 12 ++--
.../test_auto_scheduler_evolutionary_search.py | 2 -
tests/scripts/task_python_docs.sh | 6 +-
tutorials/auto_scheduler/conv2d.json | 1 +
tutorials/auto_scheduler/matmul.json | 2 +
tutorials/auto_scheduler/tune_conv2d_layer_cuda.py | 6 +-
tutorials/auto_scheduler/tune_matmul_x86.py | 6 +-
11 files changed, 85 insertions(+), 27 deletions(-)
diff --git a/python/tvm/auto_scheduler/cost_model/xgb_model.py b/python/tvm/auto_scheduler/cost_model/xgb_model.py
index 9a534aa..b8953c1 100644
--- a/python/tvm/auto_scheduler/cost_model/xgb_model.py
+++ b/python/tvm/auto_scheduler/cost_model/xgb_model.py
@@ -192,7 +192,7 @@ class XGBModel(PythonBasedModel):
else:
ret = np.random.uniform(0, 1, (len(states),))
- # Predict 0 for invalid states that failed to be lowered.
+ # Predict -inf for invalid states that failed to be lowered.
for idx, feature in enumerate(features):
if feature.min() == feature.max() == 0:
ret[idx] = float("-inf")
diff --git a/python/tvm/auto_scheduler/search_policy.py b/python/tvm/auto_scheduler/search_policy.py
index f3d459e..838ced1 100644
--- a/python/tvm/auto_scheduler/search_policy.py
+++ b/python/tvm/auto_scheduler/search_policy.py
@@ -148,10 +148,11 @@ class SketchPolicy(SearchPolicy):
DEFAULT_PARAMS = {
"eps_greedy": 0.05,
"retry_search_one_round_on_empty": 10,
+ "sample_init_population": 50,
+ "sample_init_use_measured_ratio": 0.2,
"evolutionary_search_population": 2048,
"evolutionary_search_num_iters": 10,
"evolutionary_search_mutation_prob": 0.85,
- "evolutionary_search_use_measured_ratio": 0.2,
"cpu_multi_level_tiling_structure": "SSRSRS",
"gpu_multi_level_tiling_structure": "SSSRRSRS",
# Notice: the default thread bind policy of GPU assumes the tiling structure to have at
diff --git a/src/auto_scheduler/compute_dag.cc b/src/auto_scheduler/compute_dag.cc
index 3b0de97..75fd27e 100755
--- a/src/auto_scheduler/compute_dag.cc
+++ b/src/auto_scheduler/compute_dag.cc
@@ -1175,7 +1175,7 @@ Array<State> ComputeDAG::InferBound(const Array<State>& states) const {
support::parallel_for(0, states.size(), [this, &states, &out_states](int i) {
try {
- out_states.Set(i, this->InferBound(states[i]));
+ out_states.Set(i, (states[i].defined()) ? this->InferBound(states[i]) : states[i]);
} catch (dmlc::Error& e) {
LOG(WARNING) << "InferBound fails on the state:\n"
<< states[i] << "\n"
diff --git a/src/auto_scheduler/search_policy/sketch_policy.cc b/src/auto_scheduler/search_policy/sketch_policy.cc
index 8de17a6..60178b3 100644
--- a/src/auto_scheduler/search_policy/sketch_policy.cc
+++ b/src/auto_scheduler/search_policy/sketch_policy.cc
@@ -258,12 +258,12 @@ std::pair<Array<MeasureInput>, Array<MeasureResult>> SketchPolicyNode::ContinueS
Array<State> SketchPolicyNode::SearchOneRound(int num_random_states, Array<State>* random_states) {
// Get parameters
- int population = GetIntParam(params, SketchParamKey::EvolutionarySearch::population);
- int num_use_measured =
- std::min(static_cast<int>(measured_states_vector_.size()),
- static_cast<int>(
- GetDoubleParam(params, SketchParamKey::EvolutionarySearch::use_measured_ratio) *
- population));
+ int population = GetIntParam(params, SketchParamKey::SampleInitPopulation::population);
+ int num_use_measured = std::min(
+ static_cast<int>(measured_states_vector_.size()),
+ static_cast<int>(
+ GetDoubleParam(params, SketchParamKey::SampleInitPopulation::use_measured_ratio) *
+ population));
bool is_cost_model_reasonable = !program_cost_model->IsInstance<RandomModelNode>();
// 1. Generate sketches
@@ -374,10 +374,14 @@ Array<State> SketchPolicyNode::SampleInitPopulation(const Array<State>& sketches
}
auto tic_begin = std::chrono::high_resolution_clock::now();
- while (static_cast<int>(out_states.size()) < out_size && fail_ct < out_size) {
+ size_t iter = 1;
+ size_t target_size = out_size;
+ size_t unchange_cnt = 0;
+ while (out_states.size() < target_size) {
std::vector<State> temp_states(out_size);
- support::parallel_for(0, out_size - out_states.size(),
+ // Initial a batch of states randomly
+ support::parallel_for(0, out_size,
[this, &temp_states, &sketches, &rand_gens](int index) {
// Randomly choose a sketch
State tmp_s = sketches[(rand_gens[index])() % sketches.size()];
@@ -395,13 +399,57 @@ Array<State> SketchPolicyNode::SampleInitPopulation(const Array<State>& sketches
}
});
- for (int i = 0; i < out_size; i++) {
- if (temp_states[i].defined()) {
- out_states.push_back(std::move(temp_states[i]));
+ // Filter out the states that were failed to apply initial rules
+ Array<State> cand_states;
+ for (auto tmp_s : temp_states) {
+ if (tmp_s.defined()) {
+ cand_states.push_back(std::move(tmp_s));
} else {
fail_ct++;
}
}
+
+ unchange_cnt++;
+ if (!cand_states.empty()) {
+ // Run the cost model to make filter out states that failed to extract features.
+ // This may happen due to illegal schedules or the schedules that uses too much
+ // memory on GPU.
+ std::vector<float> pop_scores;
+ pop_scores.reserve(cand_states.size());
+ cand_states = search_task->compute_dag.InferBound(cand_states);
+ program_cost_model->Predict(search_task, cand_states, &pop_scores);
+
+ for (size_t i = 0; i < cand_states.size(); i++) {
+ if (pop_scores[i] > -1e10) {
+ out_states.push_back(std::move(cand_states[i]));
+ unchange_cnt = 0; // Reset the counter once we found a valid state
+ } else {
+ fail_ct++;
+ }
+ }
+ }
+
+ if (iter % 5 == 0) {
+ double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
+ std::chrono::high_resolution_clock::now() - tic_begin)
+ .count();
+ StdCout(verbose) << "Sample Iter: " << iter << std::fixed << std::setprecision(4)
+ << "\t#Pop: " << out_states.size() << "\t#Target: " << target_size
+ << "\tfail_ct: " << fail_ct << "\tTime elapsed: " << std::fixed
+ << std::setprecision(2) << duration << std::endl;
+ }
+
+ if (unchange_cnt == 5) {
+ // Reduce the target size to avoid too-long time in this phase if no valid state was found
+ // in the past iterations
+ if (target_size > 1) {
+ target_size /= 2;
+ StdCout(verbose) << "#Target has been reduced to " << target_size
+ << " due to too many failures";
+ }
+ unchange_cnt = 0;
+ }
+ iter++;
}
double duration = std::chrono::duration_cast<std::chrono::duration<double>>(
diff --git a/src/auto_scheduler/search_policy/sketch_policy.h b/src/auto_scheduler/search_policy/sketch_policy.h
index edaa89e..930fd5e 100644
--- a/src/auto_scheduler/search_policy/sketch_policy.h
+++ b/src/auto_scheduler/search_policy/sketch_policy.h
@@ -56,16 +56,20 @@ struct SketchParamKey {
/*! \brief Retry several times if SearchOneRound gets no valid state. */
static constexpr const char* empty_retry_count = "retry_search_one_round_on_empty";
+ struct SampleInitPopulation {
+ /*! \brief The population size of initial sampling. */
+ static constexpr const char* population = "sample_init_population";
+ /*! \brief The maximum percentage of measured states in the initial sampling. */
+ static constexpr const char* use_measured_ratio = "sample_init_use_measured_ratio";
+ };
+
struct EvolutionarySearch {
- /*! \brief The population size for evolutionary search. */
+ /*! \brief The population size of evolutionary search. */
static constexpr const char* population = "evolutionary_search_population";
/*! \brief The number of iterations performed by generic algorithm.*/
static constexpr const char* num_iters = "evolutionary_search_num_iters";
/*! \brief The mutation probability.*/
static constexpr const char* mutation_prob = "evolutionary_search_mutation_prob";
- /*! \brief The maximum percentage of measured states in the initial population for evolutionary
- * search. */
- static constexpr const char* use_measured_ratio = "evolutionary_search_use_measured_ratio";
};
struct MultiLevelTiling {
diff --git a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
index 9fec6f1..4acfa39 100644
--- a/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
+++ b/tests/python/unittest/test_auto_scheduler_evolutionary_search.py
@@ -44,7 +44,6 @@ def test_mutate_tile_size():
def predict(self, task, states):
scores = []
- found = False
for state in states:
scores.append(1 if self.is_good_state(state) else 0)
return scores
@@ -89,7 +88,6 @@ def test_mutate_parallel():
def predict(self, task, states):
scores = []
- found = False
for state in states:
scores.append(1 if self.is_good_state(state) else 0)
return scores
diff --git a/tests/scripts/task_python_docs.sh b/tests/scripts/task_python_docs.sh
index e0165b6..e279b90 100755
--- a/tests/scripts/task_python_docs.sh
+++ b/tests/scripts/task_python_docs.sh
@@ -37,7 +37,11 @@ rm -rf docs/_build
mkdir -p docs/_build/html
rm -rf docs/gen_modules
rm -rf docs/doxygen
-rm -rf tutorials/auto_scheduler/auto_scheduler_logs
+
+# prepare auto scheduler tutorials
+rm -rf tutorials/auto_scheduler/*logs
+mkdir tutorials/auto_scheduler/logs
+cp -f tutorials/auto_scheduler/{matmul,conv2d}.json tutorials/auto_scheduler/logs
# remove stale tutorials and always build from scratch.
rm -rf docs/tutorials
diff --git a/tutorials/auto_scheduler/conv2d.json b/tutorials/auto_scheduler/conv2d.json
new file mode 100644
index 0000000..10f63d0
--- /dev/null
+++ b/tutorials/auto_scheduler/conv2d.json
@@ -0,0 +1 @@
+{"i": [["[\"conv2d_layer\", 1, 7, 7, 512, 512, 3, 3, [1, 1], [1, 1]]", "cuda -keys=cuda,gpu -max_num_threads=1024 -thread_warp_size=32"], [[], [["CI", 5], ["SP", 3, 0, 1, [1, 1, 1, 1], 1], ["SP", 3, 5, 512, [1, 64, 2, 1], 1], ["SP", 3, 10, 7, [1, 1, 1, 1], 1], ["SP", 3, 15, 7, [1, 1, 7, 1], 1], ["SP", 3, 20, 512, [4, 2], 1], ["SP", 3, 23, 3, [1, 1], 1], ["SP", 3, 26, 3, [3, 1], 1], ["RE", 3, [0, 5, 10, 15, 1, 6, 11, 16, 2, 7, 12, 17, 20, 23, 26, 21, 24, 27, 3, 8, 13, 18, 22, 25, 28, 4, 9 [...]
diff --git a/tutorials/auto_scheduler/matmul.json b/tutorials/auto_scheduler/matmul.json
new file mode 100644
index 0000000..7f53764
--- /dev/null
+++ b/tutorials/auto_scheduler/matmul.json
@@ -0,0 +1,2 @@
+# Keep a valid schedule for demonstraction
+{"i": [["[\"matmul_add\", 128, 128, 128, \"float32\"]", "llvm -keys=cpu"], [[], [["SP", 2, 0, 128, [4, 2, 4], 1], ["SP", 2, 4, 128, [1, 32, 2], 1], ["SP", 2, 8, 128, [2], 1], ["RE", 2, [0, 4, 1, 5, 8, 2, 6, 9, 3, 7]], ["FSP", 4, 0, 0, 1], ["FSP", 4, 2, 1, 1], ["RE", 4, [0, 2, 1, 3]], ["CA", 2, 4, 1], ["FU", 4, [0, 1]], ["AN", 4, 0, 3], ["PR", 2, 0, "auto_unroll_max_step$0"], ["AN", 2, 9, 2]]]], "r": [[5.80388e-05], 0, 0.299169, 1603402396], "v": "v0.2"}
diff --git a/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py b/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py
index 18c2e85..68fa5d5 100644
--- a/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py
+++ b/tutorials/auto_scheduler/tune_conv2d_layer_cuda.py
@@ -90,10 +90,10 @@ print(task.compute_dag)
# * see :any:`auto_scheduler.TuningOptions`,
# :any:`auto_scheduler.LocalRPCMeasureContext` for more parameters.
-if not os.path.exists("./auto_scheduler_logs"):
- os.mkdir("./auto_scheduler_logs")
+if not os.path.exists("./logs"):
+ os.mkdir("./logs")
-logfile = os.path.join("./auto_scheduler_logs", "conv2d.json")
+logfile = os.path.join("./logs", "conv2d.json")
measure_ctx = auto_scheduler.LocalRPCMeasureContext(min_repeat_ms=300)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=10,
diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py
index 9e9423f..a2331fc 100644
--- a/tutorials/auto_scheduler/tune_matmul_x86.py
+++ b/tutorials/auto_scheduler/tune_matmul_x86.py
@@ -82,10 +82,10 @@ print(task.compute_dag)
# and do more analyses later.
# * see :any:`auto_scheduler.TuningOptions` for more parameters
-if not os.path.exists("./auto_scheduler_logs"):
- os.mkdir("./auto_scheduler_logs")
+if not os.path.exists("./logs"):
+ os.mkdir("./logs")
-logfile = os.path.join("./auto_scheduler_logs", "matmul.json")
+logfile = os.path.join("./logs", "matmul.json")
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=10, measure_callbacks=[auto_scheduler.RecordToFile(logfile)]
)