You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by lm...@apache.org on 2020/12/05 01:34:49 UTC
[tvm] branch main updated: [AutoScheduler] Improve CPU matmul
tutorial (#7037)
This is an automated email from the ASF dual-hosted git repository.
lmzheng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new fd5ce64 [AutoScheduler] Improve CPU matmul tutorial (#7037)
fd5ce64 is described below
commit fd5ce645941153972ecee404c90479b2b391df15
Author: Lianmin Zheng <li...@gmail.com>
AuthorDate: Fri Dec 4 17:34:31 2020 -0800
[AutoScheduler] Improve CPU matmul tutorial (#7037)
* [AutoScheduler] Improve matmul tutorial
* fix
---
python/tvm/auto_scheduler/search_task.py | 8 ++++++++
src/auto_scheduler/compute_dag.cc | 4 ++--
tutorials/auto_scheduler/ci_logs/matmul.json | 2 +-
tutorials/auto_scheduler/tune_matmul_x86.py | 2 +-
4 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/python/tvm/auto_scheduler/search_task.py b/python/tvm/auto_scheduler/search_task.py
index 31698d0..a22cd6e 100644
--- a/python/tvm/auto_scheduler/search_task.py
+++ b/python/tvm/auto_scheduler/search_task.py
@@ -260,6 +260,10 @@ class SearchTask(Object):
A `te.Schedule` and the a list of `te.Tensor` to be used in `tvm.lower` or `tvm.build`.
"""
inp, _ = load_best_record(log_file, self.workload_key)
+ if inp is None:
+ raise RuntimeError(
+ "Cannot find any valid schedule for %s in file %s" % (self.workload_key, log_file)
+ )
if layout_rewrite_option is None:
layout_rewrite_option = LayoutRewriteOption.NO_REWRITE
@@ -285,6 +289,10 @@ class SearchTask(Object):
The best schedule code in python API or CUDA source code
"""
inp, _ = load_best_record(log_file, self.workload_key)
+ if inp is None:
+ raise RuntimeError(
+ "Cannot find any valid schedule for %s in file %s" % (self.workload_key, log_file)
+ )
if print_mode == "schedule":
return self.compute_dag.print_python_code_from_state(inp.state)
diff --git a/src/auto_scheduler/compute_dag.cc b/src/auto_scheduler/compute_dag.cc
index ca59979..a8aff00 100755
--- a/src/auto_scheduler/compute_dag.cc
+++ b/src/auto_scheduler/compute_dag.cc
@@ -961,7 +961,7 @@ ComputeDAG ComputeDAG::RewriteLayout(Array<Step>* transform_steps,
axes_stride[new_axes[i]] *= new_shape[i];
}
- // Add extra layout transpose stage
+ // Add an extra layout transform stage
const auto& layout_transform_tensor = te::compute(
new_shape,
[&new_stride, &placeholder_op, &origin_shape, &new_shape, &origin_axes,
@@ -978,7 +978,7 @@ ComputeDAG ComputeDAG::RewriteLayout(Array<Step>* transform_steps,
}
return placeholder_op.output(0)(access_indices);
},
- "auto_schedule_layout_transpose");
+ "auto_scheduler_layout_transform");
new_op_to_update = layout_transform_tensor->op;
// Update the transform steps
diff --git a/tutorials/auto_scheduler/ci_logs/matmul.json b/tutorials/auto_scheduler/ci_logs/matmul.json
index bc5d6f0..2e3a984 100644
--- a/tutorials/auto_scheduler/ci_logs/matmul.json
+++ b/tutorials/auto_scheduler/ci_logs/matmul.json
@@ -1,2 +1,2 @@
# Keep a valid schedule for demonstraction. This is used to prevent flasky errors in CI.
-{"i": [["[\"matmul_add\", 1024, 1024, 1024, \"float32\"]", "llvm -keys=cpu -link-params=0", [24, 64, 64, 0, 0, 0, 0, 0]], [[], [["SP", 2, 0, 1024, [2, 4, 16], 1], ["SP", 2, 4, 1024, [16, 4, 16], 1], ["SP", 2, 8, 1024, [8], 1], ["RE", 2, [0, 4, 1, 5, 8, 2, 6, 9, 3, 7]], ["FU", 2, [0, 1, 2, 3]], ["AN", 2, 0, 3], ["AN", 4, 0, 3], ["PR", 2, 0, "auto_unroll_max_step$0"], ["AN", 2, 6, 2]]]], "r": [[0.028777], 0, 0.613435, 1607038574], "v": "v0.3"}
+{"i": [["[\"matmul_add\", 1024, 1024, 1024, \"float32\"]", "llvm -keys=cpu -link-params=0", [18, 64, 64, 0, 0, 0, 0, 0]], [[], [["SP", 2, 0, 1024, [2, 1, 4], 1], ["SP", 2, 4, 1024, [1, 1, 8], 1], ["SP", 2, 8, 1024, [4], 1], ["RE", 2, [0, 4, 1, 5, 8, 2, 6, 9, 3, 7]], ["FSP", 4, 0, 0, 2], ["FSP", 4, 3, 1, 2], ["RE", 4, [0, 3, 1, 4, 2, 5]], ["CA", 2, 4, 3], ["FU", 4, [0, 1]], ["AN", 4, 0, 3], ["PR", 2, 0, "auto_unroll_max_step$8"], ["AN", 2, 9, 2], ["AN", 4, 4, 2]]]], "r": [[0.0044742], 0, [...]
diff --git a/tutorials/auto_scheduler/tune_matmul_x86.py b/tutorials/auto_scheduler/tune_matmul_x86.py
index bdd14be..9bc15ae 100644
--- a/tutorials/auto_scheduler/tune_matmul_x86.py
+++ b/tutorials/auto_scheduler/tune_matmul_x86.py
@@ -118,7 +118,7 @@ sch, args = task.apply_best(log_file)
######################################################################
# We can lower the schedule to see the IR after auto-scheduling.
# The auto-scheduler correctly performs optimizations including multi-level tiling,
-# parallelization, vectorization, unrolling and operator fusion.
+# layout transformation, parallelization, vectorization, unrolling, and operator fusion.
print("Lowered TIR:")
print(tvm.lower(sch, args, simple_mode=True))