You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/09/16 22:47:25 UTC
[tvm] 01/20: update configs

This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 13088402eeaa1eed9d2ffadf3f1ff5c7ba123b44
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 11:01:56 2022 -0700

    update configs
---
 python/tvm/meta_schedule/default_config.py | 111 +++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 652f09261b..73ba0e4fa8 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -20,9 +20,11 @@ import logging
 from os import path as osp
 from typing import Callable, Dict, List, Optional, Union
 
+from tvm._ffi.registry import register_func
+from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc
+from tvm.tir import PrimFunc, tensor_intrin
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -43,6 +45,20 @@ FnPostproc = Callable[[], List[Postproc]]
 FnMutatorProb = Callable[[], Dict[Mutator, float]]
 
 
+def target_has_vnni(target):
+    return target in {
+        "cascadelake",
+        "icelake-client",
+        "icelake-server",
+        "rocketlake",
+        "tigerlake",
+        "cooperlake",
+        "sapphirerapids",
+        "alderlake",
+    }
+
+
+@register_func("tvm.meta_schedule.tune.parse_mod")  # for use in ApplyHistoryBest
 def mod(mod: Union[PrimFunc, IRModule]) -> IRModule:  # pylint: disable=redefined-outer-name
     """Normalize the input to an IRModule"""
     if isinstance(mod, PrimFunc):
@@ -174,9 +190,13 @@ def schedule_rules(  # pylint: disable=redefined-outer-name
         return sch_rules()
     if sch_rules is not None:
         raise TypeError(f"Expected `sch_rules` to be None or callable, but gets: {sch_rules}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.schedule_rules()
         return _DefaultLLVM.schedule_rules()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.schedule_rules()
         return _DefaultCUDA.schedule_rules()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -190,9 +210,13 @@ def postproc(  # pylint: disable=redefined-outer-name
         return postproc()
     if postproc is not None:
         raise TypeError(f"Expected `postproc` to be None or callable, but gets: {postproc}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.postprocs()
         return _DefaultLLVM.postprocs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.postprocs()
         return _DefaultCUDA.postprocs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -208,9 +232,13 @@ def mutator_probs(  # pylint: disable=redefined-outer-name
         raise TypeError(
             f"Expected `mutator_probs` to be None or callable, but gets: {mutator_probs}"
         )
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.mutator_probs()
         return _DefaultLLVM.mutator_probs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.mutator_probs()
         return _DefaultCUDA.mutator_probs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -277,6 +305,77 @@ class _DefaultLLVM:
         }
 
 
+class _DefaultLLVMVNNI:
+    """Default tuning configuration for LLVM with VNNI."""
+
+    @staticmethod
+    def schedule_rules() -> List[ScheduleRule]:
+        from tvm.meta_schedule import schedule_rule as M
+
+        logger.info("Using schedule rule: LLVM VNNI")
+
+        return [
+            M.AutoInline(
+                into_producer=False,
+                into_consumer=True,
+                inline_const_tensor=True,
+                disallow_if_then_else=True,
+                require_injective=True,
+                require_ordered=True,
+                disallow_op=["tir.exp"],
+            ),
+            M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
+            M.MultiLevelTilingWithIntrin(
+                tensor_intrin.VNNI_DOT_16x4_INTRIN,
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.MultiLevelTiling(
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.ParallelizeVectorizeUnroll(
+                max_jobs_per_core=16,
+                max_vectorize_extent=64,
+                unroll_max_steps=[0, 16, 64, 512],
+                unroll_explicit=True,
+            ),
+            M.RandomComputeLocation(),
+        ]
+
+    @staticmethod
+    def postprocs() -> List[Postproc]:
+        from tvm.meta_schedule import postproc as M
+
+        return [
+            M.DisallowDynamicLoop(),
+            M.RewriteParallelVectorizeUnroll(),
+            M.RewriteReductionBlock(),
+            M.RewriteTensorize(vectorize_init_loop=True),
+            M.RewriteLayout(),
+        ]
+
+    @staticmethod
+    def mutator_probs() -> Dict[Mutator, float]:
+        return _DefaultLLVM.mutator_probs()
+
+
 class _DefaultCUDA:
     """Default tuning configuration for CUDA."""
 
@@ -355,10 +454,12 @@ class _DefaultCUDATensorCore:
         from tvm.meta_schedule import schedule_rule as M
         from tvm.tir.tensor_intrin.cuda import get_wmma_intrin_group
 
+        logger.info("Using schedule rule: CUDA tensorcore")
+
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    get_wmma_intrin_group(
+                    tensor_intrin.get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,