You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/09/16 22:47:25 UTC
[tvm] 01/20: update configs
This is an automated email from the ASF dual-hosted git repository.
andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git
commit 13088402eeaa1eed9d2ffadf3f1ff5c7ba123b44
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 11:01:56 2022 -0700
update configs
---
python/tvm/meta_schedule/default_config.py | 111 +++++++++++++++++++++++++++--
1 file changed, 106 insertions(+), 5 deletions(-)
diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 652f09261b..73ba0e4fa8 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -20,9 +20,11 @@ import logging
from os import path as osp
from typing import Callable, Dict, List, Optional, Union
+from tvm._ffi.registry import register_func
+from tvm.contrib import nvcc
from tvm.ir import IRModule
from tvm.target import Target
-from tvm.tir import PrimFunc
+from tvm.tir import PrimFunc, tensor_intrin
from .builder import Builder, LocalBuilder
from .cost_model import CostModel, XGBModel
@@ -43,6 +45,20 @@ FnPostproc = Callable[[], List[Postproc]]
FnMutatorProb = Callable[[], Dict[Mutator, float]]
+def target_has_vnni(target):
+ return target in {
+ "cascadelake",
+ "icelake-client",
+ "icelake-server",
+ "rocketlake",
+ "tigerlake",
+ "cooperlake",
+ "sapphirerapids",
+ "alderlake",
+ }
+
+
+@register_func("tvm.meta_schedule.tune.parse_mod") # for use in ApplyHistoryBest
def mod(mod: Union[PrimFunc, IRModule]) -> IRModule: # pylint: disable=redefined-outer-name
"""Normalize the input to an IRModule"""
if isinstance(mod, PrimFunc):
@@ -174,9 +190,13 @@ def schedule_rules( # pylint: disable=redefined-outer-name
return sch_rules()
if sch_rules is not None:
raise TypeError(f"Expected `sch_rules` to be None or callable, but gets: {sch_rules}")
- if target.kind.name in ["llvm", "hexagon"]:
+ if target.kind.name == "llvm":
+ if target_has_vnni(target.mcpu):
+ return _DefaultLLVMVNNI.schedule_rules()
return _DefaultLLVM.schedule_rules()
if target.kind.name in ["cuda", "rocm", "vulkan"]:
+ if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+ return _DefaultCUDATensorCore.schedule_rules()
return _DefaultCUDA.schedule_rules()
raise ValueError(f"Unsupported target: {target}")
@@ -190,9 +210,13 @@ def postproc( # pylint: disable=redefined-outer-name
return postproc()
if postproc is not None:
raise TypeError(f"Expected `postproc` to be None or callable, but gets: {postproc}")
- if target.kind.name in ["llvm", "hexagon"]:
+ if target.kind.name == "llvm":
+ if target_has_vnni(target.mcpu):
+ return _DefaultLLVMVNNI.postprocs()
return _DefaultLLVM.postprocs()
if target.kind.name in ["cuda", "rocm", "vulkan"]:
+ if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+ return _DefaultCUDATensorCore.postprocs()
return _DefaultCUDA.postprocs()
raise ValueError(f"Unsupported target: {target}")
@@ -208,9 +232,13 @@ def mutator_probs( # pylint: disable=redefined-outer-name
raise TypeError(
f"Expected `mutator_probs` to be None or callable, but gets: {mutator_probs}"
)
- if target.kind.name in ["llvm", "hexagon"]:
+ if target.kind.name == "llvm":
+ if target_has_vnni(target.mcpu):
+ return _DefaultLLVMVNNI.mutator_probs()
return _DefaultLLVM.mutator_probs()
if target.kind.name in ["cuda", "rocm", "vulkan"]:
+ if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+ return _DefaultCUDATensorCore.mutator_probs()
return _DefaultCUDA.mutator_probs()
raise ValueError(f"Unsupported target: {target}")
@@ -277,6 +305,77 @@ class _DefaultLLVM:
}
+class _DefaultLLVMVNNI:
+ """Default tuning configuration for LLVM with VNNI."""
+
+ @staticmethod
+ def schedule_rules() -> List[ScheduleRule]:
+ from tvm.meta_schedule import schedule_rule as M
+
+ logger.info("Using schedule rule: LLVM VNNI")
+
+ return [
+ M.AutoInline(
+ into_producer=False,
+ into_consumer=True,
+ inline_const_tensor=True,
+ disallow_if_then_else=True,
+ require_injective=True,
+ require_ordered=True,
+ disallow_op=["tir.exp"],
+ ),
+ M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
+ M.MultiLevelTilingWithIntrin(
+ tensor_intrin.VNNI_DOT_16x4_INTRIN,
+ structure="SSRSRS",
+ tile_binds=None,
+ max_innermost_factor=64,
+ vector_load_lens=None,
+ reuse_read=None,
+ reuse_write=M.ReuseType(
+ req="may",
+ levels=[1, 2],
+ scope="global",
+ ),
+ ),
+ M.MultiLevelTiling(
+ structure="SSRSRS",
+ tile_binds=None,
+ max_innermost_factor=64,
+ vector_load_lens=None,
+ reuse_read=None,
+ reuse_write=M.ReuseType(
+ req="may",
+ levels=[1, 2],
+ scope="global",
+ ),
+ ),
+ M.ParallelizeVectorizeUnroll(
+ max_jobs_per_core=16,
+ max_vectorize_extent=64,
+ unroll_max_steps=[0, 16, 64, 512],
+ unroll_explicit=True,
+ ),
+ M.RandomComputeLocation(),
+ ]
+
+ @staticmethod
+ def postprocs() -> List[Postproc]:
+ from tvm.meta_schedule import postproc as M
+
+ return [
+ M.DisallowDynamicLoop(),
+ M.RewriteParallelVectorizeUnroll(),
+ M.RewriteReductionBlock(),
+ M.RewriteTensorize(vectorize_init_loop=True),
+ M.RewriteLayout(),
+ ]
+
+ @staticmethod
+ def mutator_probs() -> Dict[Mutator, float]:
+ return _DefaultLLVM.mutator_probs()
+
+
class _DefaultCUDA:
"""Default tuning configuration for CUDA."""
@@ -355,10 +454,12 @@ class _DefaultCUDATensorCore:
from tvm.meta_schedule import schedule_rule as M
from tvm.tir.tensor_intrin.cuda import get_wmma_intrin_group
+ logger.info("Using schedule rule: CUDA tensorcore")
+
return [
M.MultiLevelTilingTensorCore(
intrin_groups=[
- get_wmma_intrin_group(
+ tensor_intrin.get_wmma_intrin_group(
store_scope="shared",
in_dtype=in_dtype,
out_dtype=out_dtype,