You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/07/05 10:35:42 UTC

[GitHub] [incubator-tvm] yangjunpro commented on a change in pull request #5962: [Ansor][AutoTVM v2.0] Part 0: Ansor minimum system for auto schedule generating

yangjunpro commented on a change in pull request #5962:
URL: https://github.com/apache/incubator-tvm/pull/5962#discussion_r449859957



##########
File path: tests/python/unittest/test_ansor_search_policy.py
##########
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test search policy"""
+
+import random
+import numpy as np
+import tempfile
+
+import tvm
+from tvm import ansor
+
+from test_ansor_common import matmul_ansor_test, PropagatingThread
+
+def search_common(target="llvm", seed=random.randint(1, 1 << 30), runner='local',
+                  cost_model=None, num_measure_trials=2, params=None,
+                  pre_search_callbacks=None):
+    print("Test %s schedule search with the default search policy" % (target))
+
+    random.seed(seed)
+    N = 128
+    workload_key = ansor.make_workload_key(matmul_ansor_test, (N, N, N))
+    dag = ansor.ComputeDAG(workload_key)
+    target = tvm.target.create(target)
+    task = ansor.SearchTask(dag, workload_key, target)
+
+    with tempfile.NamedTemporaryFile() as fp:
+        log_file = fp.name
+
+        search_policy = ansor.EmptyPolicy()
+        # search_policy = ansor.SketchSearchPolicy(cost_model, params=params, seed=seed)
+        tuning_options = ansor.TuningOptions(num_measure_trials=num_measure_trials, runner=runner,
+                                             verbose=0,
+                                             measure_callbacks=[ansor.LogToFile(log_file)],
+                                             pre_search_callbacks=pre_search_callbacks)
+        sch, args = ansor.auto_schedule(task, target, search_policy=search_policy,
+                                        tuning_options=tuning_options)
+        inp, res = ansor.best_measure_pair_in_file(log_file, workload_key, target)
+
+        print("==== Python Code ====")
+        print(dag.print_python_code_from_state(inp.state))
+
+        try:
+            print("==== Lowered Stmt ====")
+            print(tvm.lower(sch, args, simple_mode=True))
+            mod = tvm.build(sch, args, target)
+
+            ctx = tvm.context(str(target), 0)
+            dtype = dag.tensors[0].dtype
+            a = tvm.nd.array(np.random.uniform(size=(N, N)).astype(dtype), ctx)
+            b = tvm.nd.array(np.random.uniform(size=(N, N)).astype(dtype), ctx)
+            c = tvm.nd.array(np.zeros((N, N), dtype=dtype), ctx)
+            mod(a, b, c)
+            tvm.testing.assert_allclose(c.asnumpy(), np.dot(
+                a.asnumpy(), b.asnumpy()), rtol=1e-5)
+            print("==== Verification passed ====")
+        except Exception:
+            raise Exception("Error encountered with seed: %d" % (seed))
+    print()
+
+
+def test_search_basic():
+    if not tvm.runtime.enabled("llvm"):
+        return

Review comment:
       why here we directly return if llvm is not enabled? I think there are still alternative backends other than LLVM? 

##########
File path: python/tvm/ansor/auto_schedule.py
##########
@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+User interface for Ansor auto-scheduler.
+
+The basic schedule search process for Ansor is design to be:
+`Program sampling` -> `Performance Tuning`.
+
+In `Program sampling`, we use some predefined or heuristic rules to generate several initial
+schedules. Based on these initial start points, we have `Performance Tuning` to apply cost model
+and evolutionary search to seek for schedules with the best performance. Candidate schedules will
+be measured in the target hardware.
+"""
+
+import tvm._ffi
+from tvm.runtime import Object
+from .compute_dag import ComputeDAG
+from .measure import LocalBuilder, LocalRunner
+from . import _ffi_api
+
+
+@tvm._ffi.register_object("ansor.HardwareParams")
+class HardwareParams(Object):
+    """ The parameters of target hardware, this is used to guide the search process of
+    SearchPolicy.
+
+    TODO(...): This is considering to merge with the new Target:
+    https://discuss.tvm.ai/t/rfc-tvm-target-specification/6844
+
+    Parameters
+    ----------
+    num_cores : int
+        The number of device cores.
+    vector_unit_bytes : int
+        The width of vector units in bytes.
+    cache_line_bytes : int
+        The size of cache line in bytes.
+    max_unroll_vec : int
+        The max length of an axis to be unrolled or vectorized.
+    max_innermost_split_factor : int
+        The max split factor for the innermost tile.
+    """
+    def __init__(self, num_cores, vector_unit_bytes, cache_line_bytes,
+                 max_unroll_vec, max_innermost_split_factor):
+        self.__init_handle_by_constructor__(_ffi_api.HardwareParams, num_cores,
+                                            vector_unit_bytes, cache_line_bytes,
+                                            max_unroll_vec, max_innermost_split_factor)
+
+
+@tvm._ffi.register_object("ansor.SearchTask")
+class SearchTask(Object):
+    """ The meta-information of a search task.
+
+    Parameters
+    ----------
+    dag : ComputeDAG
+        The ComputeDAG for target compute declaration.
+    workload_key : str
+        The workload key for target compute declaration.
+    target : tvm.target.Target
+        The target device of this search task.
+    target_host : Optional[tvm.target.Target]
+        The target host device of this search task.
+    hardware_params : Optional[HardwareParams]
+        Hardware parameters used in this search task.
+    """
+    def __init__(self, dag, workload_key, target, target_host=None,
+                 hardware_params=None):
+        self.__init_handle_by_constructor__(_ffi_api.SearchTask, dag,
+                                            workload_key, target, target_host,
+                                            hardware_params)
+
+
+@tvm._ffi.register_object("ansor.SearchPolicy")
+class SearchPolicy(Object):
+    """ The base class for search policy  """
+
+
+@tvm._ffi.register_object("ansor.EmptyPolicy")
+class EmptyPolicy(SearchPolicy):
+    """ This is an example empty search policy which will always generate
+    the init state of target ComputeDAG.
+    """
+    def __init__(self):
+        self.__init_handle_by_constructor__(_ffi_api.EmptyPolicy)
+
+
+@tvm._ffi.register_object("ansor.TuneOption")
+class TuneOption(Object):
+    """ This controls the options of performance tuning.
+
+    Parameters
+    ----------
+    num_measure_trials: int = 0
+      The number of total schedule measure trials.
+      Ansor takes `num_measure_trials` state for measuring in total, and finally gets the best
+      schedule among them.
+      With `num_measure_trials` == 0, Ansor will do the schedule search but don't involve
+      measurement, this can be used if we want to quickly get a runnable schedule without
+      performance tuning.
+    early_stopping: int = -1
+      Stops early the tuning if no improvement get after n measurements.
+    num_measures_per_round: int = 64
+      The number of programs to be measured at each search round.
+      The whole schedule search process is designed to have several rounds to try a total
+      `num_measure_trials` schedules.
+      We have: `num_search_rounds` = `num_measure_trials` // `num_measures_per_round`
+    verbose: int = 1
+      Verbosity level. 0 for silent, 1 to output information during schedule search.
+    builder: Union[Builder, str] = 'local'
+      Builder which builds the program.
+    runner: Union[Runner, str] = 'local'
+      Runner which runs the program and measures time costs.
+    measure_callbacks: Optional[List[MeasureCallback]]
+      Callback functions called after each measure.
+      Candidates:
+        - ansor.LogToFile
+    pre_search_callbacks: Optional[List[SearchCallback]]
+      Callback functions called before the search process.
+      Candidates:
+        - ansor.PreloadMeasuredStates
+        - ansor.PreloadCustomSketchRule
+        TODO(jcf94): Add these implementation in later PRs.
+    """
+    def __init__(self, num_measure_trials=0, early_stopping=-1, num_measures_per_round=64,

Review comment:
       In my opinions, TuningOptions is already a class holding configurations related to schedule tuning stuffs, I think it might be a little bit overkill to introduce another config dict? 

##########
File path: python/tvm/ansor/auto_schedule.py
##########
@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+User interface for Ansor auto-scheduler.
+
+The basic schedule search process for Ansor is designed to be:
+`Program sampling` -> `Performance Tuning`.
+
+In `Program sampling`, we use some predefined or heuristic rules to generate several initial
+schedules. Based on these initial start points, we have `Performance Tuning` to apply cost model
+and evolutionary search to seek for schedules with the best performance. Candidate schedules will
+be measured in the target hardware.
+"""
+
+import tvm._ffi
+from tvm.runtime import Object
+from .compute_dag import ComputeDAG
+from .measure import LocalBuilder, LocalRunner
+from . import _ffi_api
+
+
+@tvm._ffi.register_object("ansor.HardwareParams")
+class HardwareParams(Object):
+    """ The parameters of target hardware, this is used to guide the search process of
+    SearchPolicy.
+
+    TODO(...): This is considering to merge with the new Target:
+    https://discuss.tvm.ai/t/rfc-tvm-target-specification/6844
+
+    Parameters

Review comment:
       Yes, sometimes we may choose not to use all the cores available in the host system since some of the cores need to be kept for other purpose, such as data pre-processing, etc. So I think it's better to leave _num_cores_  a user-specified options with default value. 
   Regarding to _vector_unit_bytes_/_cache_line_bytes_, in my understanding they are  primitive hardware configurations, maybe we can consider to auto-detect it? 
   The principle is that we should do auto-detect as much as we can, without too much overkill. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org