You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by co...@apache.org on 2021/01/06 02:04:51 UTC
[tvm] branch main updated: [Fix][Autoscheduler] Costmodel enhancement & bug fix for graph debug runtime (#7197)

This is an automated email from the ASF dual-hosted git repository.

comaniac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 040afb0  [Fix][Autoscheduler] Costmodel enhancement & bug fix for graph debug runtime (#7197)
040afb0 is described below

commit 040afb0245526e1cc71dc0ada6c3c5787394a5c6
Author: Chenfan <ch...@alibaba-inc.com>
AuthorDate: Wed Jan 6 10:04:34 2021 +0800

    [Fix][Autoscheduler] Costmodel enhancement & bug fix for graph debug runtime (#7197)
    
    * Enhancement for autoscheduler cost model
    
    * Bug fix for graph_runtime_debug
    
    * Update
    
    * Lint fix
    
    * Update
    
    * Update
    
    * Add file exist check for cost model load
    
    * Update
    
    * Update
    
    * Lint fix
    
    * Update
    
    * Bug fix
---
 python/tvm/auto_scheduler/cost_model/xgb_model.py | 25 ++++++++++++++++++++++-
 python/tvm/auto_scheduler/task_scheduler.py       | 13 ++++++++++--
 src/auto_scheduler/feature.cc                     | 18 ++++++++++------
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/python/tvm/auto_scheduler/cost_model/xgb_model.py b/python/tvm/auto_scheduler/cost_model/xgb_model.py
index eb14dff..f426482 100644
--- a/python/tvm/auto_scheduler/cost_model/xgb_model.py
+++ b/python/tvm/auto_scheduler/cost_model/xgb_model.py
@@ -88,7 +88,14 @@ class XGBModel(PythonBasedModel):
     their predictions.
     """
 
-    def __init__(self, verbose_eval=25, num_warmup_sample=100, seed=None):
+    def __init__(
+        self,
+        verbose_eval=25,
+        num_warmup_sample=100,
+        seed=None,
+        model_file=None,
+        adapative_training=False,
+    ):
         global xgb
         try:
             if xgb is None:
@@ -116,12 +123,15 @@ class XGBModel(PythonBasedModel):
         self.plan_size = 32
         self.num_warmup_sample = num_warmup_sample
         self.verbose_eval = verbose_eval
+        self.model_file = model_file
+        self.adapative_training = adapative_training
 
         super().__init__()
 
         # cache measurement input/result pairs and extracted features
         self.inputs = []
         self.results = []
+        self.last_train_length = 0
         self.inputs_feature_cache = []
 
     def update(self, inputs, results):
@@ -141,6 +151,15 @@ class XGBModel(PythonBasedModel):
         self.inputs.extend(inputs)
         self.results.extend(results)
 
+        if (
+            self.adapative_training
+            and len(self.inputs) - self.last_train_length < self.last_train_length / 5
+        ):
+            # Set a training threshold related to `last_train_length` to reduce the training
+            # overhead when there're too many logs
+            return
+        self.last_train_length = len(self.inputs)
+
         # extract feature
         n_cached = len(self.inputs_feature_cache)
         features, normalized_throughputs, task_ids = get_per_store_features_from_measure_pairs(
@@ -176,6 +195,10 @@ class XGBModel(PythonBasedModel):
             ],
         )
 
+        # Update the model file if it has been set
+        if self.model_file:
+            self.save(self.model_file)
+
     def predict(self, task, states):
         """Predict the scores of states
         Parameters
diff --git a/python/tvm/auto_scheduler/task_scheduler.py b/python/tvm/auto_scheduler/task_scheduler.py
index ab83ff4..975306f 100644
--- a/python/tvm/auto_scheduler/task_scheduler.py
+++ b/python/tvm/auto_scheduler/task_scheduler.py
@@ -47,6 +47,7 @@ def make_search_policies(
     verbose,
     load_model_file=None,
     load_log_file=None,
+    adapative_training=False,
 ):
     """Make a list of search policies for a list of search tasks.
     It creates one policy per task.
@@ -70,6 +71,9 @@ def make_search_policies(
     load_log_file: Optional[str]
         Load measurement records from this file. If it is not None, the status of the
         task scheduler, search policies and cost models will be restored according to this file.
+    adapative_training: bool = False
+        Option used for XGBModel, which will reduce the model training frequency when there're too
+        many logs.
 
     Returns
     -------
@@ -82,11 +86,16 @@ def make_search_policies(
     if isinstance(search_policy, str):
         policy_type, model_type = search_policy.split(".")
         if model_type == "xgb":
-            cost_model = XGBModel(num_warmup_sample=len(tasks) * num_measures_per_round)
-            if load_model_file:
+            cost_model = XGBModel(
+                num_warmup_sample=len(tasks) * num_measures_per_round,
+                model_file=load_model_file,
+                adapative_training=adapative_training,
+            )
+            if load_model_file and os.path.isfile(load_model_file):
                 logger.info("TaskScheduler: Load pretrained model...")
                 cost_model.load(load_model_file)
             elif load_log_file:
+                logger.info("TaskScheduler: Reload measured states and train the model...")
                 cost_model.update_from_file(load_log_file)
         elif model_type == "random":
             cost_model = RandomModel()
diff --git a/src/auto_scheduler/feature.cc b/src/auto_scheduler/feature.cc
index 47b9fb6..a5d4958 100755
--- a/src/auto_scheduler/feature.cc
+++ b/src/auto_scheduler/feature.cc
@@ -1462,12 +1462,18 @@ void GetPerStoreFeaturesFromMeasurePairs(const Array<MeasureInput>& inputs,
     if (find_res == task_cache.end()) {
       if (inputs[i]->task->compute_dag.defined()) {  // the measure input is complete
         task = inputs[i]->task;
-      } else {  // the measure input is incomplete
-        // rebuild task for incomplete measure pairs read from file
-        Array<te::Tensor> tensors = (*workload_key_to_tensors)(workload_key);
-        task = SearchTask(ComputeDAG(tensors), workload_key, inputs[i]->task->target,
-                          inputs[i]->task->target_host, inputs[i]->task->hardware_params,
-                          inputs[i]->task->layout_rewrite_option);
+      } else {
+        // The measure input is incomplete, rebuild task for incomplete measure pairs read from file
+        try {
+          Array<te::Tensor> tensors = (*workload_key_to_tensors)(workload_key);
+          task = SearchTask(ComputeDAG(tensors), workload_key, inputs[i]->task->target,
+                            inputs[i]->task->target_host, inputs[i]->task->hardware_params,
+                            inputs[i]->task->layout_rewrite_option);
+        } catch (std::exception& e) {
+          // Cannot build ComputeDAG from workload key, the task may have not been registered in
+          // this search round
+          continue;
+        }
       }
       task_id = task_cache.size();