You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/09/16 22:47:24 UTC

[tvm] branch aluo/rebase-08312022-autotensorization-fq2i-changes created (now bf7d866541)

This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a change to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git


      at bf7d866541 div impl

This branch includes the following new commits:

     new 13088402ee update configs
     new 0a191e975a fix new imports
     new ce7fcbdae3 dnnl pattern matching
     new ac675efd90 optional cast
     new 52e39697f3 update configs
     new 7ee33cbdc2 fix new imports
     new 1bae92e12f update configs
     new a9b1d124e1 fix new imports
     new 946815850b dnnl pattern matching
     new a45cd01be2 optional cast
     new 46e9243f67 old string without stringview
     new 90986c86ec Merge branch 'aluo/rebase-08312022-autotensorization' of gitlab.com:octoml/tvm into aluo/rebase-08312022-autotensorization
     new 6100211a53 optional complete
     new 3b0984e1ba llvm instance optional
     new b2b0772cc4 vm.cc optional
     new c591a2e9e9 ad simplify optional
     new 0def3b736c session optional
     new 18b8089564 final optional
     new 031676c7d2 undo c++ 17 feature again
     new bf7d866541 div impl

The 20 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[tvm] 17/20: session optional

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 0def3b736c0fa037afc75259c24abe0c43895228
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 15:18:13 2022 -0700

    session optional
---
 src/runtime/hexagon/rpc/simulator/session.cc | 113 ++++++++++++++++-----------
 1 file changed, 68 insertions(+), 45 deletions(-)

diff --git a/src/runtime/hexagon/rpc/simulator/session.cc b/src/runtime/hexagon/rpc/simulator/session.cc
index 8943c0b4bf..0469ad5e6e 100644
--- a/src/runtime/hexagon/rpc/simulator/session.cc
+++ b/src/runtime/hexagon/rpc/simulator/session.cc
@@ -18,6 +18,7 @@
  */
 
 #include <HexagonWrapper.h>
+#include <dmlc/optional.h>
 #include <tvm/runtime/packed_func.h>
 #include <tvm/runtime/registry.h>
 // POSIX includes
@@ -30,7 +31,6 @@
 #include <iterator>
 #include <map>
 #include <memory>
-#include <optional>
 #include <sstream>
 #include <string>
 #include <utility>
@@ -55,6 +55,19 @@ using string_list = std::deque<std::string>;
 
 namespace detail {
 
+// An "Optional" class, originally a replacement for llvm::Optional, then an
+// extension of dmlc::optional to make it compatible with C++17's std::optional.
+template <typename T>
+struct Optional : public dmlc::optional<T> {
+  using dmlc::optional<T>::optional;
+  using dmlc::optional<T>::operator=;
+  Optional(const T& val) : dmlc::optional<T>(val) {}  // NOLINT(*)
+  Optional() = default;
+
+  T* operator->() { return &this->operator*(); }
+  const T* operator->() const { return &this->operator*(); }
+};
+
 // Replacement for llvm::StringSwitch.
 template <typename T>
 class StringSwitch {
@@ -81,10 +94,10 @@ class StringSwitch {
  private:
   const std::string key;
   std::map<std::string, T> map;
-  std::optional<T> def_val;
+  Optional<T> def_val;
 };
 
-using MaybeString = std::optional<std::string>;
+using MaybeString = Optional<std::string>;
 
 MaybeString front(const string_list& deq) {
   return !deq.empty() ? MaybeString(deq.front()) : MaybeString();
@@ -99,60 +112,65 @@ MaybeString pop_front(string_list& deq) {  // NOLINT(*)
 
 // Functions used when parsing the argument string.
 
-std::optional<int64_t> to_int(const MaybeString& str) {
+Optional<int64_t> to_int(const MaybeString& str) {
+  auto none = Optional<int64_t>();
   if (str.has_value()) {
     try {
       size_t pos;
       int64_t val = std::stoll(*str, &pos, 0);
-      return pos == str->size() ? std::optional<int64_t>(val) : std::nullopt;
+      return pos == str->size() ? Optional<int64_t>(val) : none;
     } catch (std::invalid_argument&) {
     }
   }
-  return std::nullopt;
+  return none;
 }
 
-std::optional<uint64_t> to_uint(const MaybeString& str) {
+Optional<uint64_t> to_uint(const MaybeString& str) {
+  auto none = Optional<uint64_t>();
   if (str.has_value()) {
     try {
       size_t pos;
       uint64_t val = std::stoull(*str, &pos, 0);
-      return pos == str->size() ? std::optional<uint64_t>(val) : std::nullopt;
+      return pos == str->size() ? Optional<uint64_t>(val) : none;
     } catch (std::invalid_argument&) {
     }
   }
-  return std::nullopt;
+  return none;
 }
 
-std::optional<float> to_float(const MaybeString& str) {
+Optional<float> to_float(const MaybeString& str) {
+  auto none = Optional<float>();
   if (str.has_value()) {
     try {
       size_t pos;
       float val = std::stof(*str, &pos);
-      return pos == str->size() ? std::optional<float>(val) : std::nullopt;
+      return pos == str->size() ? Optional<float>(val) : none;
     } catch (std::invalid_argument&) {
     }
   }
-  return std::nullopt;
+  return none;
 }
 
-std::optional<bool> to_bool(const MaybeString& str) {
+Optional<bool> to_bool(const MaybeString& str) {
+  auto none = Optional<bool>();
   if (auto num = to_int(str)) {
     if (*num == 0) return false;
     if (*num == 1) return true;
-    return std::nullopt;
+    return none;
   }
   if (str) {
     if (*str == "true" || *str == "TRUE") return true;
     if (*str == "false" || *str == "FALSE") return false;
   }
-  return std::nullopt;
+  return none;
 }
 
 template <typename T>
-using MaybeRange = std::optional<std::pair<T, T>>;
+using MaybeRange = Optional<std::pair<T, T>>;
 
-template <typename T, std::optional<T> Parse(const MaybeString&)>
+template <typename T, Optional<T> Parse(const MaybeString&)>
 MaybeRange<T> to_range(const MaybeString& str) {
+  auto none = MaybeRange<T>();
   if (str && !str->empty()) {
     auto n = str->find('-', 1);
     if (n != std::string::npos) {
@@ -163,7 +181,7 @@ MaybeRange<T> to_range(const MaybeString& str) {
       }
     }
   }
-  return std::nullopt;
+  return none;
 }
 
 }  // namespace detail
@@ -209,7 +227,7 @@ class SimulatorRPCChannel final : public RPCChannel {
   static HEX_8u_t PassVirtAddrCallback(void* handle, int threadno, HEX_8u_t RssV, HEX_8u_t RttV,
                                        HEX_8u_t RxxV, HEX_1u_t imm);
 
-  std::optional<HEXAPI_Cpu> GetCPU(const detail::MaybeString& cpu_str);
+  detail::Optional<HEXAPI_Cpu> GetCPU(const detail::MaybeString& cpu_str);
 
   // File name templates for mkstemps.
 #define SUFFIX ".cfg"
@@ -288,17 +306,17 @@ class SimulatorRPCChannel final : public RPCChannel {
   bool HandleV2PTranslation(string_list& rest);     // NOLINT(*)
   bool HandleVerbose(string_list& rest);            // NOLINT(*)
 
-  using MaybeUInt64 = std::optional<uint64_t>;
+  using MaybeUInt64 = detail::Optional<uint64_t>;
   using MaybeUIntRange = std::pair<MaybeUInt64, MaybeUInt64>;
 
   bool should_parse_next(const string_list& rest);
-  std::optional<HEXAPI_Interval> to_interval(const detail::MaybeString& str);
-  std::optional<HEXAPI_TimingMode> to_timingmode(const detail::MaybeString& str);
-  std::optional<HEXAPI_VerboseMode> to_verbosemode(const detail::MaybeString& str);
-  std::optional<HEXAPI_Nullptr> to_nullptr(const detail::MaybeString& str);
+  detail::Optional<HEXAPI_Interval> to_interval(const detail::MaybeString& str);
+  detail::Optional<HEXAPI_TimingMode> to_timingmode(const detail::MaybeString& str);
+  detail::Optional<HEXAPI_VerboseMode> to_verbosemode(const detail::MaybeString& str);
+  detail::Optional<HEXAPI_Nullptr> to_nullptr(const detail::MaybeString& str);
 
   MaybeUIntRange ahb_, axi2_;
-  std::optional<uint32_t> debug_port_;
+  detail::Optional<uint32_t> debug_port_;
 
   using OptionHandler = bool (SimulatorRPCChannel::*)(string_list&);
   static std::map<std::string, OptionHandler> opt_map_;
@@ -538,14 +556,15 @@ HEX_8u_t SimulatorRPCChannel::PassVirtAddrCallback(void* handle, int threadno, H
   return RssV;
 }
 
-std::optional<HEXAPI_Cpu> SimulatorRPCChannel::GetCPU(const detail::MaybeString& cpu_str) {
-  if (!cpu_str) return std::nullopt;
-  return detail::StringSwitch<std::optional<HEXAPI_Cpu>>(*cpu_str)
+detail::Optional<HEXAPI_Cpu> SimulatorRPCChannel::GetCPU(const detail::MaybeString& cpu_str) {
+  auto none = detail::Optional<HEXAPI_Cpu>();
+  if (!cpu_str) return none;
+  return detail::StringSwitch<detail::Optional<HEXAPI_Cpu>>(*cpu_str)
       .Case("v65", HEX_CPU_V65)
       .Case("v66", HEX_CPU_V66)
       .Case("v68", HEX_CPU_V68)
       .Case("v69", HEX_CPU_V69)
-      .Default(std::nullopt);
+      .Default(none);
 }
 
 SimulatorRPCChannel::SimulatorRPCChannel(int stack_size, std::string args) {
@@ -1246,8 +1265,9 @@ bool SimulatorRPCChannel::should_parse_next(const string_list& rest) {
   return false;
 }
 
-std::optional<HEXAPI_Interval> SimulatorRPCChannel::to_interval(const detail::MaybeString& str) {
-  if (!str) return std::nullopt;
+detail::Optional<HEXAPI_Interval> SimulatorRPCChannel::to_interval(const detail::MaybeString& str) {
+  auto none = detail::Optional<HEXAPI_Interval>();
+  if (!str) return none;
 
   if (auto val = detail::to_int(*str)) {
     switch (*val) {
@@ -1260,18 +1280,19 @@ std::optional<HEXAPI_Interval> SimulatorRPCChannel::to_interval(const detail::Ma
     }
   }
 
-  return detail::StringSwitch<std::optional<HEXAPI_Interval>>(*str)
+  return detail::StringSwitch<detail::Optional<HEXAPI_Interval>>(*str)
       .Case("MILLISEC", HEX_MILLISEC)
       .Case("MICROSEC", HEX_MICROSEC)
       .Case("NANOSEC", HEX_NANOSEC)
       .Case("PICOSEC", HEX_PICOSEC)
       .Case("PCYCLE", HEX_PCYCLE)
-      .Default(std::nullopt);
+      .Default(none);
 }
 
-std::optional<HEXAPI_TimingMode> SimulatorRPCChannel::to_timingmode(
+detail::Optional<HEXAPI_TimingMode> SimulatorRPCChannel::to_timingmode(
     const detail::MaybeString& str) {
-  if (!str) return std::nullopt;
+  auto none = detail::Optional<HEXAPI_TimingMode>();
+  if (!str) return none;
 
   if (auto val = detail::to_int(*str)) {
     switch (*val) {
@@ -1283,17 +1304,18 @@ std::optional<HEXAPI_TimingMode> SimulatorRPCChannel::to_timingmode(
     }
   }
 
-  return detail::StringSwitch<std::optional<HEXAPI_TimingMode>>(*str)
+  return detail::StringSwitch<detail::Optional<HEXAPI_TimingMode>>(*str)
       .Case("NOTIMING", HEX_NOTIMING)
       .Case("TIMING_NODBC", HEX_TIMING_NODBC)
       .Case("TIMING", HEX_TIMING)
       .Case("TIMING_COHERENCY", HEX_TIMING_COHERENCY)
-      .Default(std::nullopt);
+      .Default(none);
 }
 
-std::optional<HEXAPI_VerboseMode> SimulatorRPCChannel::to_verbosemode(
+detail::Optional<HEXAPI_VerboseMode> SimulatorRPCChannel::to_verbosemode(
     const detail::MaybeString& str) {
-  if (!str) return std::nullopt;
+  auto none = detail::Optional<HEXAPI_VerboseMode>();
+  if (!str) return none;
 
   if (auto val = detail::to_int(*str)) {
     switch (*val) {
@@ -1306,17 +1328,18 @@ std::optional<HEXAPI_VerboseMode> SimulatorRPCChannel::to_verbosemode(
     }
   }
 
-  return detail::StringSwitch<std::optional<HEXAPI_VerboseMode>>(*str)
+  return detail::StringSwitch<detail::Optional<HEXAPI_VerboseMode>>(*str)
       .Case("SILENT", HEX_SILENT)
       .Case("QUIET", HEX_QUIET)
       .Case("NORMAL", HEX_NORMAL)
       .Case("VERBOSE", HEX_VERBOSE)
       .Case("REALLY_VERBOSE", HEX_REALLY_VERBOSE)
-      .Default(std::nullopt);
+      .Default(none);
 }
 
-std::optional<HEXAPI_Nullptr> SimulatorRPCChannel::to_nullptr(const detail::MaybeString& str) {
-  if (!str) return std::nullopt;
+detail::Optional<HEXAPI_Nullptr> SimulatorRPCChannel::to_nullptr(const detail::MaybeString& str) {
+  auto none = detail::Optional<HEXAPI_Nullptr>();
+  if (!str) return none;
 
   if (auto val = detail::to_int(*str)) {
     switch (*val) {
@@ -1328,12 +1351,12 @@ std::optional<HEXAPI_Nullptr> SimulatorRPCChannel::to_nullptr(const detail::Mayb
     }
   }
 
-  return detail::StringSwitch<std::optional<HEXAPI_Nullptr>>(*str)
+  return detail::StringSwitch<detail::Optional<HEXAPI_Nullptr>>(*str)
       .Case("IGNORE", HEX_NULLPTR_IGNORE)
       .Case("WARN", HEX_NULLPTR_WARN)
       .Case("FATAL", HEX_NULLPTR_FATAL)
       .Case("PCZERO", HEX_NULLPTR_PCZERO)
-      .Default(std::nullopt);
+      .Default(none);
 }
 
 TVM_REGISTER_GLOBAL("tvm.contrib.hexagon.create_hexagon_session")


[tvm] 01/20: update configs

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 13088402eeaa1eed9d2ffadf3f1ff5c7ba123b44
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 11:01:56 2022 -0700

    update configs
---
 python/tvm/meta_schedule/default_config.py | 111 +++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 652f09261b..73ba0e4fa8 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -20,9 +20,11 @@ import logging
 from os import path as osp
 from typing import Callable, Dict, List, Optional, Union
 
+from tvm._ffi.registry import register_func
+from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc
+from tvm.tir import PrimFunc, tensor_intrin
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -43,6 +45,20 @@ FnPostproc = Callable[[], List[Postproc]]
 FnMutatorProb = Callable[[], Dict[Mutator, float]]
 
 
+def target_has_vnni(target):
+    return target in {
+        "cascadelake",
+        "icelake-client",
+        "icelake-server",
+        "rocketlake",
+        "tigerlake",
+        "cooperlake",
+        "sapphirerapids",
+        "alderlake",
+    }
+
+
+@register_func("tvm.meta_schedule.tune.parse_mod")  # for use in ApplyHistoryBest
 def mod(mod: Union[PrimFunc, IRModule]) -> IRModule:  # pylint: disable=redefined-outer-name
     """Normalize the input to an IRModule"""
     if isinstance(mod, PrimFunc):
@@ -174,9 +190,13 @@ def schedule_rules(  # pylint: disable=redefined-outer-name
         return sch_rules()
     if sch_rules is not None:
         raise TypeError(f"Expected `sch_rules` to be None or callable, but gets: {sch_rules}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.schedule_rules()
         return _DefaultLLVM.schedule_rules()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.schedule_rules()
         return _DefaultCUDA.schedule_rules()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -190,9 +210,13 @@ def postproc(  # pylint: disable=redefined-outer-name
         return postproc()
     if postproc is not None:
         raise TypeError(f"Expected `postproc` to be None or callable, but gets: {postproc}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.postprocs()
         return _DefaultLLVM.postprocs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.postprocs()
         return _DefaultCUDA.postprocs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -208,9 +232,13 @@ def mutator_probs(  # pylint: disable=redefined-outer-name
         raise TypeError(
             f"Expected `mutator_probs` to be None or callable, but gets: {mutator_probs}"
         )
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.mutator_probs()
         return _DefaultLLVM.mutator_probs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.mutator_probs()
         return _DefaultCUDA.mutator_probs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -277,6 +305,77 @@ class _DefaultLLVM:
         }
 
 
+class _DefaultLLVMVNNI:
+    """Default tuning configuration for LLVM with VNNI."""
+
+    @staticmethod
+    def schedule_rules() -> List[ScheduleRule]:
+        from tvm.meta_schedule import schedule_rule as M
+
+        logger.info("Using schedule rule: LLVM VNNI")
+
+        return [
+            M.AutoInline(
+                into_producer=False,
+                into_consumer=True,
+                inline_const_tensor=True,
+                disallow_if_then_else=True,
+                require_injective=True,
+                require_ordered=True,
+                disallow_op=["tir.exp"],
+            ),
+            M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
+            M.MultiLevelTilingWithIntrin(
+                tensor_intrin.VNNI_DOT_16x4_INTRIN,
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.MultiLevelTiling(
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.ParallelizeVectorizeUnroll(
+                max_jobs_per_core=16,
+                max_vectorize_extent=64,
+                unroll_max_steps=[0, 16, 64, 512],
+                unroll_explicit=True,
+            ),
+            M.RandomComputeLocation(),
+        ]
+
+    @staticmethod
+    def postprocs() -> List[Postproc]:
+        from tvm.meta_schedule import postproc as M
+
+        return [
+            M.DisallowDynamicLoop(),
+            M.RewriteParallelVectorizeUnroll(),
+            M.RewriteReductionBlock(),
+            M.RewriteTensorize(vectorize_init_loop=True),
+            M.RewriteLayout(),
+        ]
+
+    @staticmethod
+    def mutator_probs() -> Dict[Mutator, float]:
+        return _DefaultLLVM.mutator_probs()
+
+
 class _DefaultCUDA:
     """Default tuning configuration for CUDA."""
 
@@ -355,10 +454,12 @@ class _DefaultCUDATensorCore:
         from tvm.meta_schedule import schedule_rule as M
         from tvm.tir.tensor_intrin.cuda import get_wmma_intrin_group
 
+        logger.info("Using schedule rule: CUDA tensorcore")
+
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    get_wmma_intrin_group(
+                    tensor_intrin.get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,


[tvm] 04/20: optional cast

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit ac675efd907abd6d7ee2167f08de40d121d1c1ac
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 12:37:50 2022 -0700

    optional cast
---
 python/tvm/relay/op/contrib/dnnl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py
index e27449ac43..67909b04b8 100644
--- a/python/tvm/relay/op/contrib/dnnl.py
+++ b/python/tvm/relay/op/contrib/dnnl.py
@@ -831,7 +831,7 @@ class LayerNormRewritePattern1(DFPatternCallback):
         self.beta = wildcard()
         mu = is_op("mean")(self.data)
         diff = is_op("subtract")(self.data, mu)
-        cdiff = is_op("cast")(diff)
+        cdiff = is_op("cast")(diff) | diff  # cast does not need to be here usually
         const_two = (
             is_expr(relay.const(2))
             | is_expr(relay.const(2.0))


[tvm] 08/20: fix new imports

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit a9b1d124e1391413b91431020502db0f0546bd34
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 21:05:08 2022 -0700

    fix new imports
---
 python/tvm/meta_schedule/default_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index a88a9be88d..c1e886e7f7 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -24,7 +24,7 @@ from tvm._ffi.registry import register_func
 from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc, tensor_intrin
+from tvm.tir import PrimFunc
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -460,7 +460,7 @@ class _DefaultCUDATensorCore:
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    tensor_intrin.get_wmma_intrin_group(
+                    get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,


[tvm] 02/20: fix new imports

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 0a191e975a5e84592366b6ec9a030a0bf3754b6d
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 21:05:08 2022 -0700

    fix new imports
---
 python/tvm/meta_schedule/default_config.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 73ba0e4fa8..c1e886e7f7 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -24,7 +24,7 @@ from tvm._ffi.registry import register_func
 from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc, tensor_intrin
+from tvm.tir import PrimFunc
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -311,6 +311,7 @@ class _DefaultLLVMVNNI:
     @staticmethod
     def schedule_rules() -> List[ScheduleRule]:
         from tvm.meta_schedule import schedule_rule as M
+        from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN
 
         logger.info("Using schedule rule: LLVM VNNI")
 
@@ -326,7 +327,7 @@ class _DefaultLLVMVNNI:
             ),
             M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
             M.MultiLevelTilingWithIntrin(
-                tensor_intrin.VNNI_DOT_16x4_INTRIN,
+                VNNI_DOT_16x4_INTRIN,
                 structure="SSRSRS",
                 tile_binds=None,
                 max_innermost_factor=64,
@@ -459,7 +460,7 @@ class _DefaultCUDATensorCore:
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    tensor_intrin.get_wmma_intrin_group(
+                    get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,


[tvm] 18/20: final optional

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 18b8089564e3eda6caf42ede61a24a6d47efb841
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 15:22:55 2022 -0700

    final optional
---
 src/relay/transforms/fold_explicit_padding.cc  | 13 ++---
 src/relay/transforms/pattern_utils.h           | 69 +++++++-------------------
 src/tir/transforms/common_subexpr_elim_tools.h |  5 +-
 src/tir/transforms/loop_partition.cc           | 51 +++++++------------
 4 files changed, 44 insertions(+), 94 deletions(-)

diff --git a/src/relay/transforms/fold_explicit_padding.cc b/src/relay/transforms/fold_explicit_padding.cc
index 794bcfd3d0..37385f80c1 100644
--- a/src/relay/transforms/fold_explicit_padding.cc
+++ b/src/relay/transforms/fold_explicit_padding.cc
@@ -22,6 +22,7 @@
  * \brief A pass for folding explicit pads into other ops.
  */
 
+#include <dmlc/optional.h>
 #include <tvm/relay/dataflow_matcher.h>
 #include <tvm/relay/expr.h>
 #include <tvm/relay/expr_functor.h>
@@ -31,10 +32,6 @@
 #include <tvm/tir/op.h>
 #include <tvm/topi/nn/pooling.h>
 
-#include <optional>
-#include <set>
-#include <string>
-
 #include "../op/tensor/transform.h"
 #include "pattern_utils.h"
 
@@ -183,10 +180,10 @@ class SimplifyExplicitPad {
     return attrs;
   }
 
-  static const std::optional<Array<PrimExpr>> get_padding(const PadAttrs* param,
-                                                          std::string data_layout) {
+  static const Optional<Array<PrimExpr>> get_padding(const PadAttrs* param,
+                                                     std::string data_layout) {
     // Gets spatial axes padding from the given PadAttrs `param`. If padding
-    // is non-zero on non-spatial axes, return std::nullopt.
+    // is non-zero on non-spatial axes, return NullOpt.
     ICHECK(param);
     ICHECK(data_layout.size() == param->pad_width.size())
         << "Data Layout and padding attributes should have the same extent";
@@ -199,7 +196,7 @@ class SimplifyExplicitPad {
       if (!image_dims.count(data_layout[i])) {
         for (size_t j = 0; j < param->pad_width[i].size(); ++j) {
           if (param->pad_width[i][j] != 0) {
-            return std::nullopt;
+            return NullOpt;
           }
         }
       }
diff --git a/src/relay/transforms/pattern_utils.h b/src/relay/transforms/pattern_utils.h
index ffe1cc2ca2..f71d84434d 100644
--- a/src/relay/transforms/pattern_utils.h
+++ b/src/relay/transforms/pattern_utils.h
@@ -27,6 +27,7 @@
 #define TVM_RELAY_TRANSFORMS_PATTERN_UTILS_H_
 
 #include <builtin_fp16.h>
+#include <dmlc/optional.h>
 #include <tvm/node/structural_equal.h>
 #include <tvm/relay/analysis.h>
 #include <tvm/relay/attrs/nn.h>
@@ -39,7 +40,6 @@
 #include <tvm/tir/data_layout.h>
 
 #include <limits>
-#include <optional>
 #include <string>
 #include <utility>
 #include <vector>
@@ -344,40 +344,6 @@ static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> s
   return Constant(arr);
 }
 
-/*!
- * \brief Create a Constant tensor of zeros.
- *
- * \param dtype The data type.
- * \param shape The shape of the output constant tensor.
- * \return A Constant.
- */
-static inline Constant MakeConstantZeros(DataType dtype, std::vector<int64_t> shape) {
-  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
-  int64_t data_size = 1;
-  for (int64_t dim : shape) {
-    data_size *= dim;
-  }
-  TVM_DTYPE_DISPATCH(dtype, DType, {
-    for (int64_t i = 0; i < data_size; i++) {
-      if (dtype == DataType::Float(16)) {
-        // convert to float16
-        // storage is uint16_t
-        // Similar handling as that in MakeConstantScalar
-        *(static_cast<DType*>(arr->data) + i) =
-            __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(0));
-      } else if (dtype == DataType::BFloat(16)) {
-        // convert to bfloat16
-        // storage is uint16_t
-        *(static_cast<DType*>(arr->data) + i) =
-            __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 7>(static_cast<float>(0));
-      } else {
-        *(static_cast<DType*>(arr->data) + i) = 0;
-      }
-    }
-  })
-  return Constant(arr);
-}
-
 /*!
  * \brief Check whether a shape is static and create corresponding Constant.
  Eventually this will be removed and replaced with CheckConstantShapeArrayInteger
@@ -439,47 +405,48 @@ inline bool IsEqualScalar(const Expr& a, const Expr& b) {
  * \param i element index
  * \return Converted scalar value, or None if conversion failed
  */
-static inline std::optional<long double> TryToScalar(const runtime::NDArray& array, size_t i = 0) {
+static inline dmlc::optional<long double> TryToScalar(const runtime::NDArray& array, size_t i = 0) {
   if (array->dtype.code == kDLInt) {
     if (array->dtype.bits == 8) {
-      return std::optional<long double>(reinterpret_cast<int8_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<int8_t*>(array->data)[i]);
     } else if (array->dtype.bits == 16) {
-      return std::optional<long double>(reinterpret_cast<int16_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<int16_t*>(array->data)[i]);
     } else if (array->dtype.bits == 32) {
-      return std::optional<long double>(reinterpret_cast<int32_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<int32_t*>(array->data)[i]);
     } else if (array->dtype.bits == 64) {
-      return std::optional<long double>(reinterpret_cast<int64_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<int64_t*>(array->data)[i]);
     }
   } else if (array->dtype.code == kDLUInt) {
     if (array->dtype.bits == 1) {  // bool
-      return std::optional<long double>(reinterpret_cast<uint8_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<uint8_t*>(array->data)[i]);
     } else if (array->dtype.bits == 8) {
-      return std::optional<long double>(reinterpret_cast<uint8_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<uint8_t*>(array->data)[i]);
     } else if (array->dtype.bits == 16) {
-      return std::optional<long double>(reinterpret_cast<uint16_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<uint16_t*>(array->data)[i]);
     } else if (array->dtype.bits == 32) {
-      return std::optional<long double>(reinterpret_cast<uint32_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<uint32_t*>(array->data)[i]);
     } else if (array->dtype.bits == 64) {
-      return std::optional<long double>(reinterpret_cast<uint64_t*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<uint64_t*>(array->data)[i]);
     }
   } else if (array->dtype.code == kDLFloat) {
     if (array->dtype.bits == 16) {
-      return std::optional<long double>(
+      return dmlc::optional<long double>(
           __extendXfYf2__<uint16_t, uint16_t, 10, float, uint32_t, 23>(
               reinterpret_cast<uint16_t*>(array->data)[i]));
     }
     if (array->dtype.bits == 32) {
-      return std::optional<long double>(reinterpret_cast<float*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<float*>(array->data)[i]);
     } else if (array->dtype.bits == 64) {
-      return std::optional<long double>(reinterpret_cast<double*>(array->data)[i]);
+      return dmlc::optional<long double>(reinterpret_cast<double*>(array->data)[i]);
     }
   } else if (array->dtype.code == kDLBfloat) {
     if (array->dtype.bits == 16) {
-      return std::optional<long double>(__extendXfYf2__<uint16_t, uint16_t, 7, float, uint32_t, 23>(
-          reinterpret_cast<uint16_t*>(array->data)[i]));
+      return dmlc::optional<long double>(
+          __extendXfYf2__<uint16_t, uint16_t, 7, float, uint32_t, 23>(
+              reinterpret_cast<uint16_t*>(array->data)[i]));
     }
   }
-  return std::nullopt;
+  return dmlc::optional<long double>();
 }
 
 /*!
diff --git a/src/tir/transforms/common_subexpr_elim_tools.h b/src/tir/transforms/common_subexpr_elim_tools.h
index 0871fd0091..fcd29fddc0 100644
--- a/src/tir/transforms/common_subexpr_elim_tools.h
+++ b/src/tir/transforms/common_subexpr_elim_tools.h
@@ -33,11 +33,12 @@
 #include <tvm/tir/stmt.h>
 #include <tvm/tir/stmt_functor.h>  // For the class StmtExprVisitor
 
-#include <optional>
 #include <unordered_map>  // For the hashtable datatype
 #include <utility>        // For pairs datatype
 #include <vector>
 
+#include "../../../3rdparty/dmlc-core/include/dmlc/optional.h"
+
 namespace tvm {
 namespace tir {
 
@@ -176,7 +177,7 @@ class UsesVarName : public StmtExprVisitor {
  */
 void PrintComputationTable(const ComputationTable& table);
 
-using MaybeValue = std::optional<PrimExpr>;
+using MaybeValue = dmlc::optional<PrimExpr>;
 
 bool EqualTerms(const PrimExpr& a, const PrimExpr& b);
 // Used for deciding the (decidable) equivalence relation
diff --git a/src/tir/transforms/loop_partition.cc b/src/tir/transforms/loop_partition.cc
index 6ecc6459b9..677506889e 100644
--- a/src/tir/transforms/loop_partition.cc
+++ b/src/tir/transforms/loop_partition.cc
@@ -29,7 +29,6 @@
 #include <tvm/tir/stmt_functor.h>
 #include <tvm/tir/transform.h>
 
-#include <optional>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -554,39 +553,25 @@ Stmt LoopPartitioner::TryPartition(const Stmt& stmt, Var var, PrimExpr min, Prim
   if (finder.partitions.empty()) return Stmt();
 
   arith::IntervalSet for_interval(min, max);
-
-  auto [middle_interval, cond_set,
-        opt_cond_value] = [&]() -> std::tuple<IntSet, ExpressionSet, std::optional<bool>> {
-    {
-      // find an interval in which all conditions on var are true
-      auto [middle_interval, cond_set] =
-          GetIntervalAndCondset(finder.partitions, for_interval, true, has_partition_hint_);
-      if (!middle_interval.IsNothing()) {
-        return {middle_interval, cond_set, true};
-      }
-    }
-
-    {
-      // if such interval doesn't exist, find an interval in which all
-      // conditions on var are false
-      auto [middle_interval, cond_set] =
-          GetIntervalAndCondset(finder.partitions, for_interval, false, has_partition_hint_);
-
-      if (!middle_interval.IsNothing()) {
-        return {middle_interval, cond_set, false};
-      }
-    }
-
-    // we couldn't find an interval in which the conditions are
-    // provably true or false.  Therefore, we can't partition the loop
-    // based on those conds
-    return {{}, {}, std::nullopt};
-  }();
-
-  if (!opt_cond_value.has_value()) {
-    return Stmt();
+  bool cond_value;
+  IntSet middle_interval;
+  ExpressionSet cond_set;
+  // find an interval in which all conditions on var are true
+  std::tie(middle_interval, cond_set) =
+      GetIntervalAndCondset(finder.partitions, for_interval, true, has_partition_hint_);
+  if (middle_interval.IsNothing()) {
+    // if such interval doesn't exist, find an interval in which all
+    // conditions on var are false
+    std::tie(middle_interval, cond_set) =
+        GetIntervalAndCondset(finder.partitions, for_interval, false, has_partition_hint_);
+    if (middle_interval.IsNothing())
+      // we couldn't find an interval in which the conditions are provably true or false
+      // Therefore, we can't partition the loop based on those conds
+      return Stmt();
+    cond_value = false;
+  } else {
+    cond_value = true;
   }
-  bool cond_value = opt_cond_value.value();
 
   IntervalSet middle_interval_i = Downcast<IntervalSet>(middle_interval);
   // middle_interval is the subrange of the loop variable range for which a


[tvm] 20/20: div impl

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit bf7d866541cc2425de526c8429f3bf086cb6fe3b
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 16 15:47:02 2022 -0700

    div impl
---
 python/tvm/relay/qnn/op/qnn.py                     |  68 ++++++++++++
 .../transform/fake_quantization_to_integer.py      |  88 +++++++++++++++-
 src/relay/qnn/op/div.cc                            | 117 +++++++++++++++++++++
 3 files changed, 272 insertions(+), 1 deletion(-)

diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 1f38385107..6d1cabeb8d 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -788,6 +788,74 @@ def mul(
     )
 
 
+def div(
+    lhs,
+    rhs,
+    lhs_scale,
+    lhs_zero_point,
+    rhs_scale,
+    rhs_zero_point,
+    output_scale,
+    output_zero_point,
+    lhs_axis=-1,
+    rhs_axis=-1,
+):
+    """Quantized division with numpy-style broadcasting.
+
+    Parameters
+    ----------
+    lhs : relay.Expr
+        The left hand side quantized input data.
+
+    rhs : relay.Expr
+        The right hand side quantized input data.
+
+    lhs_scale: relay.Expr
+        The scale of the lhs quantized expr.
+
+    lhs_zero_point: relay.Expr
+       The zero point of lhs quantized expr.
+
+    rhs_scale: relay.Expr
+        The scale of the rhs quantized expr.
+
+    rhs_zero_point: relay.Expr
+       The zero point of rhs quantized expr.
+
+    output_scale: relay.Expr
+        The scale of the output quantized expr.
+
+    output_zero_point: relay.Expr
+       The zero point of output quantized expr.
+
+    lhs_axis: int
+        The channel axis for lhs quantization. Default value is -1 which corresponds
+        to the last axis.
+
+    rhs_axis: int
+        The channel axis for rhs quantization. Default value is -1 which corresponds
+        to the last axis.
+
+    Returns
+    -------
+    result : relay.Expr
+        The computed result.
+
+    """
+    return _make.div(
+        lhs,
+        rhs,
+        lhs_scale,
+        lhs_zero_point,
+        rhs_scale,
+        rhs_zero_point,
+        output_scale,
+        output_zero_point,
+        lhs_axis,
+        rhs_axis,
+    )
+
+
 def tanh(x, scale, zero_point, output_scale, output_zero_point):
     """Quantized tanh.
 
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index bb874c131c..5b6845bd63 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -19,6 +19,7 @@ import numpy as np
 import tvm
 from tvm import relay
 from tvm.ir import TensorAffineType, TupleAffineType
+from tvm.relay.op.tensor import ones_like
 
 # import to register canonicalization funcs for fq2i
 # pylint: disable=unused-import
@@ -199,6 +200,60 @@ def broadcast_to(expr, type_map):
     return [out, t]
 
 
+@register_fake_quantization_to_integer("take")
+def take(expr, type_map):
+    """Rewrite a take op"""
+    arg1 = expr.args[0]
+    t = type_map[arg1]
+    arg2 = expr.args[1]
+    out = relay.op.take(
+        arg1,
+        arg2,
+        axis=expr.attrs.axis,
+        batch_dims=expr.attrs.batch_dims,
+        mode=expr.attrs.mode,
+    )
+    return [out, t]
+
+
+@register_fake_quantization_to_integer("power")
+def power(expr, type_map):
+    base = expr.args[0]
+    exponent = expr.args[1]
+
+    base_type = type_map[base]
+
+    if not isinstance(exponent, relay.Constant):
+        return [expr, type_map[expr]]
+
+    data = exponent.data.numpy()
+    if not len(data.shape) == 0:
+        return [expr, type_map[expr]]
+
+    data = data.item()
+    if data != 2:
+        return [expr, type_map[expr]]
+
+    out = relay.qnn.op.mul(
+        base,
+        base,
+        base_type.scale,
+        base_type.zero_point,
+        base_type.scale,
+        base_type.zero_point,
+        output_scale=base_type.scale * base_type.scale,
+        output_zero_point=base_type.zero_point,
+        lhs_axis=base_type.axis,
+        rhs_axis=base_type.axis,
+    )
+    return [
+        out,
+        TensorAffineType(
+            base_type.scale * base_type.scale, base_type.zero_point, base_type.dtype, base_type.axis
+        ),
+    ]
+
+
 @register_fake_quantization_to_integer("nn.bias_add")
 def bias_add(expr, type_map):
     """Rewrite a bias_add op"""
@@ -513,6 +568,37 @@ def register_binary_qnn(op_name, op):
 register_binary_qnn("add", lambda *args: relay.qnn.op.add(*args))
 register_binary_qnn("multiply", lambda *args: relay.qnn.op.mul(*args))
 register_binary_qnn("subtract", lambda *args: relay.qnn.op.subtract(*args))
+register_binary_qnn("divide", lambda *args: relay.qnn.op.div(*args))
+
+
+'''
+@register_fake_quantization_to_integer("divide")
+def divide(expr, type_map):
+    """Rewrite an adaptive avgpool op"""
+    numerator = expr.args[0]
+    denominator = expr.args[1]
+    numerator_t = type_map[numerator]
+    denominator_t = type_map[denominator]
+    new_scale = numerator_t.scale / (denominator_t.scale * (denominator - denominator_t.zero_point))
+    out = relay.divide(numerator, ones_like(denominator))
+    assert numerator_t.axis == denominator_t.axis, "Only support identical axis for now."
+    # print(out)
+
+    print("new out:")
+    str_new_out = str(relay.transform.InferType()(tvm.IRModule.from_expr(out)))
+    print("\n".join(str_new_out.split("\n")[-10:]))
+    print("old_out:")
+    str_old_out = str(relay.transform.InferType()(tvm.IRModule.from_expr(expr)))
+    print("\n".join(str_old_out.split("\n")[-10:]))
+    print()
+    breakpoint()
+    # print("yay!")
+    # This is to get broadcasting working to get same shape
+    return [
+        out,
+        TensorAffineType(new_scale, numerator_t.zero_point, numerator_t.dtype, numerator_t.axis),
+    ]
+'''
 
 
 def register_binary_identity(op_name, op):
@@ -578,4 +664,4 @@ register_unary_qnn("sigmoid", relay.qnn.op.sigmoid)
 register_unary_qnn("hardswish", relay.qnn.op.hardswish)
 register_unary_qnn("tanh", relay.qnn.op.tanh)
 register_unary_qnn("abs", relay.qnn.op.abs)
-register_unary_qnn("log", relay.qnn.op.log)
+register_unary_qnn("log", relay.qnn.op.log)
\ No newline at end of file
diff --git a/src/relay/qnn/op/div.cc b/src/relay/qnn/op/div.cc
new file mode 100644
index 0000000000..3c37ed41c4
--- /dev/null
+++ b/src/relay/qnn/op/div.cc
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/mul.cc
+ * \brief QNN mul operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN div op.
+ * \param attrs The QNN div attrs.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for mul op.
+ */
+Expr QnnDivCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
+                        const Array<tvm::relay::Type>& arg_types) {
+  Expr output;
+
+  // Get the attrs.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // data types
+  const auto int32_dtype = DataType::Int(32);
+  const auto float32_dtype = DataType::Float(32);
+
+  const auto* broadcast_attrs = attrs.as<BroadcastAttrs>();
+  ICHECK(broadcast_attrs != nullptr);
+
+  if (IsConstScalar(args.lhs_scale) && IsConstScalar(args.rhs_scale)) {
+    /* If both are constant:
+
+    n1/n2 = [s1(q1-z1)] / [s2(q2-z2)]
+    n1/n2 = [s1/s2][(q1-z1)/(q2-z2)]
+
+    As [(q1-z1)/(q2-z2)] is integer division, we lose perhaps significant precision.
+    To get around this we scale the numerator by C to ensure that
+
+    |C(q1-z1)| >> (q2 - z2) and the precision loss from the division is minimal:
+
+    n1/n2 = [s1/(s2 * C)][C(q1-z1)/(q2-z2)]
+    */
+
+    auto lhs_shifted = Cast(args.lhs, int32_dtype);
+    auto rhs_shifted = Cast(args.rhs, int32_dtype);
+
+    auto zero_scalar = MakeConstantScalar(int32_dtype, 0);
+    if (!IsEqualScalar(args.lhs_zero_point, zero_scalar)) {
+      lhs_shifted = Subtract(lhs_shifted, args.lhs_zero_point);
+    }
+
+    if (!IsEqualScalar(args.rhs_zero_point, zero_scalar)) {
+      rhs_shifted = Subtract(rhs_shifted, args.rhs_zero_point);
+    }
+
+    // multiply numerator to avoid precision loss, as accumulate in INT32 and
+    // may deal with UINT16, multiply by 2^15
+    int divide_scale_factor = 32768;
+    auto divide_scale_factor_constant = MakeConstantScalar(int32_dtype, divide_scale_factor);
+    output = Divide(Multiply(lhs_shifted, divide_scale_factor_constant), rhs_shifted);
+
+    // Get the adjusted new scale and zero points.
+    float lhs_scale_float = GetScalarFromConstant<float>(args.lhs_scale);
+    float rhs_scale_float = GetScalarFromConstant<float>(args.rhs_scale);
+    float new_scale_float = lhs_scale_float / (rhs_scale_float * divide_scale_factor);
+    auto new_input_scale = MakeConstantScalar(float32_dtype, new_scale_float);
+    auto new_input_zero_point = zero_scalar;
+
+    // Requantize to get Q_c
+    output = Requantize(output, input_type.shape, new_input_scale, new_input_zero_point,
+                        args.output_scale, args.output_zero_point, input_type.dtype);
+  } else {
+    LOG(FATAL) << "Non-constant scale_factor not supported yet.";
+  }
+
+  return output;
+}
+
+// QNN Multiplication operator.
+QNN_REGISTER_BINARY_OP("div")
+    .describe("Elementwise div with broadcasting for quantized tensors.")
+    .set_support_level(11)
+    .set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnDivCanonicalize);
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm


[tvm] 13/20: optional complete

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 6100211a536ea9644d2a153163a022b797cd1dab
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 14:42:04 2022 -0700

    optional complete
---
 src/tir/ir/buffer_common.h                  | 16 +++++++++-------
 src/tir/ir/expr.cc                          |  8 ++++----
 src/tir/ir/stmt.cc                          |  8 ++++----
 src/tir/transforms/inject_ptx_async_copy.cc |  8 ++++----
 src/tir/transforms/storage_rewrite.cc       | 21 +++++++++------------
 5 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/src/tir/ir/buffer_common.h b/src/tir/ir/buffer_common.h
index 5921c54d98..8dac41a02e 100644
--- a/src/tir/ir/buffer_common.h
+++ b/src/tir/ir/buffer_common.h
@@ -26,7 +26,7 @@
 #include <tvm/ir/type.h>
 #include <tvm/runtime/data_type.h>
 
-#include <optional>
+#include <utility>
 
 namespace tvm {
 namespace tir {
@@ -36,20 +36,22 @@ namespace tir {
  *
  * \param type The type to be checked.
  *
- * \return An std::optional<DataType> object. If the type is a pointer
- * to a primitive type, the object has a value which is the pointed-to
- * type. Otherwise the object is nullopt.
+ * \return A (bool, DataType) pair.  If the type is a pointer to a
+ * primitive, the boolean is true and the DataType is the pointed-to
+ * type.  Otherwise, the boolean is false and the DataType is
+ * default-constructed.  This can be replaced with std::optional with
+ * C++17 if/when C++17 is required.
  */
-inline std::optional<runtime::DataType> GetPointerType(const Type& type) {
+inline std::pair<bool, runtime::DataType> GetPointerType(const Type& type) {
   if (type.defined()) {
     if (auto* ptr_type = type.as<PointerTypeNode>()) {
       if (auto* prim_type = ptr_type->element_type.as<PrimTypeNode>()) {
-        return prim_type->dtype;
+        return {true, prim_type->dtype};
       }
     }
   }
 
-  return std::nullopt;
+  return {false, DataType()};
 }
 
 }  // namespace tir
diff --git a/src/tir/ir/expr.cc b/src/tir/ir/expr.cc
index 59db4ea410..f841f94b5a 100644
--- a/src/tir/ir/expr.cc
+++ b/src/tir/ir/expr.cc
@@ -648,7 +648,7 @@ Load::Load(DataType dtype, Var buffer_var, PrimExpr index, PrimExpr predicate, S
   // annotation tells us otherwise.
   int element_lanes = 1;
   auto pointer_type = tir::GetPointerType(buffer_var->type_annotation);
-  if (pointer_type.has_value()) {
+  if (pointer_type.first) {
     // Cannot check element type of array, as it may be different than
     // the loaded type in some cases.
     //
@@ -663,11 +663,11 @@ Load::Load(DataType dtype, Var buffer_var, PrimExpr index, PrimExpr predicate, S
     // See https://discuss.tvm.apache.org/t/pre-rfc-vectorized-tir-buffers/10615
     // for discussion.
 
-    // ICHECK(dtype.element_of() == pointer_type->element_of())
+    // ICHECK(dtype.element_of() == pointer_type.second.element_of())
     //     << "Type mismatch, cannot load type " << dtype << " from buffer " <<
     //     buffer_var->name_hint
-    //     << " of type " << pointer_type.value();
-    element_lanes = pointer_type->lanes();
+    //     << " of type " << pointer_type.second;
+    element_lanes = pointer_type.second.lanes();
   }
 
   // The C-based codegens assume that all loads occur on a array with
diff --git a/src/tir/ir/stmt.cc b/src/tir/ir/stmt.cc
index e21d014fe1..524204f3d3 100644
--- a/src/tir/ir/stmt.cc
+++ b/src/tir/ir/stmt.cc
@@ -271,7 +271,7 @@ Store::Store(Var buffer_var, PrimExpr value, PrimExpr index, PrimExpr predicate,
   // annotation tells us otherwise.
   int element_lanes = 1;
   auto pointer_type = tir::GetPointerType(buffer_var->type_annotation);
-  if (pointer_type.has_value()) {
+  if (pointer_type.first) {
     // Currently cannot check element type of array, see Load::Load
     // for details.
 
@@ -279,10 +279,10 @@ Store::Store(Var buffer_var, PrimExpr value, PrimExpr index, PrimExpr predicate,
     // See https://discuss.tvm.apache.org/t/pre-rfc-vectorized-tir-buffers/10615
     // for discussion.
 
-    // ICHECK_EQ(value.dtype().element_of(), pointer_type->element_of())
+    // ICHECK_EQ(value.dtype().element_of(), pointer_type.second.element_of())
     //     << "Type mismatch, cannot store type " << value.dtype() << " into buffer "
-    //     << buffer_var->name_hint << " of type " << pointer_type.value();
-    element_lanes = pointer_type->lanes();
+    //     << buffer_var->name_hint << " of type " << pointer_type.second;
+    element_lanes = pointer_type.second.lanes();
   }
 
   ICHECK((value.dtype().lanes() == element_lanes * index.dtype().lanes()) ||
diff --git a/src/tir/transforms/inject_ptx_async_copy.cc b/src/tir/transforms/inject_ptx_async_copy.cc
index 8ee0d054e5..c74ce9d3d2 100644
--- a/src/tir/transforms/inject_ptx_async_copy.cc
+++ b/src/tir/transforms/inject_ptx_async_copy.cc
@@ -60,21 +60,21 @@ class PTXAsyncCopyInjector : public StmtMutator {
           if (bytes == 4 || bytes == 8 || bytes == 16) {
             auto dst_elem_type = GetPointerType(store->buffer->data->type_annotation);
             auto src_elem_type = GetPointerType(load->buffer->data->type_annotation);
-            ICHECK(dst_elem_type.has_value() && src_elem_type.has_value())
+            ICHECK(dst_elem_type.first && src_elem_type.first)
                 << "Both store and load buffer should have a pointer type annotation.";
 
             int index_factor = 1;
-            if (dst_elem_type.value() != src_elem_type.value()) {
+            if (dst_elem_type != src_elem_type) {
               // The only case where src and dst have different dtypes is when the dst shared memory
               // is a byte buffer generated by merging dynamic shared memory.
               ICHECK(store->buffer.scope() == "shared.dyn");
-              ICHECK(dst_elem_type.value() == DataType::UInt(8));
+              ICHECK(dst_elem_type.second == DataType::UInt(8));
               // BufferStore/Load have the "pointer reinterpret" semantics according to their
               // "value" dtype. Their "indices" are supposed to be applied after such pointer cast,
               // for example: ((*float16)(byte_buffer))[buffer->indices] = fp16_value;
               // To replace BufferStore/Load with cp.async, we need to multiply the store index by
               // the byte size of the "value" dtype, to get the correct offset into the byte buffer.
-              index_factor = src_elem_type->bytes();
+              index_factor = src_elem_type.second.bytes();
             }
 
             if (indices_lanes == 1) {
diff --git a/src/tir/transforms/storage_rewrite.cc b/src/tir/transforms/storage_rewrite.cc
index 177017f9a2..d15bed56fd 100644
--- a/src/tir/transforms/storage_rewrite.cc
+++ b/src/tir/transforms/storage_rewrite.cc
@@ -899,7 +899,7 @@ class StoragePlanRewriter : public StmtExprMutator {
                          const StorageScope& scope, size_t const_nbits) {
     ICHECK(op != nullptr);
     // Re-use not successful, allocate a new buffer.
-    auto entry = std::make_unique<StorageEntry>();
+    std::unique_ptr<StorageEntry> entry(new StorageEntry());
     entry->attach_scope_ = attach_scope;
     entry->scope = scope;
     entry->elem_type = op->dtype.element_of();
@@ -1010,11 +1010,11 @@ class StoragePlanRewriter : public StmtExprMutator {
   // symbolic free list, for non constant items.
   std::list<StorageEntry*> sym_free_list_;
   // The allocation attach map
-  std::unordered_map<const Object*, std::vector<StorageEntry*>> attach_map_;
+  std::unordered_map<const Object*, std::vector<StorageEntry*> > attach_map_;
   // The allocation assign map
   std::unordered_map<const VarNode*, StorageEntry*> alloc_map_;
   // The allocations
-  std::vector<std::unique_ptr<StorageEntry>> alloc_vec_;
+  std::vector<std::unique_ptr<StorageEntry> > alloc_vec_;
   // The buffer objects being remapped
   std::unordered_map<const BufferNode*, Buffer> buffer_remap_;
   // analyzer
@@ -1125,8 +1125,8 @@ class VectorTypeAccessChecker : public StmtExprVisitor {
     // track the parameter itself.
     for (Var buffer_var : params) {
       auto pointer_type = GetPointerType(buffer_var->type_annotation);
-      if (pointer_type.has_value() && (buffer_map.count(buffer_var) == 0)) {
-        DataType dtype = pointer_type.value();
+      if (pointer_type.first && (buffer_map.count(buffer_var) == 0)) {
+        DataType dtype = pointer_type.second;
         PrimExpr extent = 0;
         OnArrayDeclaration(buffer_var, dtype, extent, BufferVarInfo::kPrimFuncBufferMap);
       }
@@ -1190,8 +1190,8 @@ class VectorTypeAccessChecker : public StmtExprVisitor {
   void HandleLetNode(Var let_var) {
     if (let_var->dtype.is_handle()) {
       auto pointer_type = GetPointerType(let_var->type_annotation);
-      if (pointer_type.has_value()) {
-        OnArrayDeclaration(let_var, pointer_type.value(), 0, BufferVarInfo::kLetNode);
+      if (pointer_type.first) {
+        OnArrayDeclaration(let_var, pointer_type.second, 0, BufferVarInfo::kLetNode);
       } else if (allow_untyped_pointers_) {
         OnArrayDeclaration(let_var, let_var->dtype, 0, BufferVarInfo::kLetNode);
       } else {
@@ -1463,13 +1463,10 @@ class VectorTypeRewriter : public StmtExprMutator {
 
   Stmt VisitStmt_(const LetStmtNode* op) final {
     auto it = rewrite_map_.find(op->var.get());
-    PrimExpr value = this->VisitExpr(op->value);
-    Stmt body = this->VisitStmt(op->body);
-    Var var = (it == rewrite_map_.end()) ? op->var : it->second.new_buffer_var;
-    if (var.same_as(op->var) && value.same_as(op->value) && body.same_as(op->body)) {
+    if (it == rewrite_map_.end()) {
       return GetRef<Stmt>(op);
     }
-    return LetStmt(var, value, body);
+    return LetStmt(it->second.new_buffer_var, op->value, op->body);
   }
 
   Buffer RemapBuffer(Buffer buf) {


[tvm] 19/20: undo c++ 17 feature again

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 031676c7d257c7d904909b0c848e6f1ef312da38
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 15:31:37 2022 -0700

    undo c++ 17 feature again
---
 src/runtime/graph_executor/graph_executor.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/runtime/graph_executor/graph_executor.cc b/src/runtime/graph_executor/graph_executor.cc
index fc7e82bed4..78e65f6f23 100644
--- a/src/runtime/graph_executor/graph_executor.cc
+++ b/src/runtime/graph_executor/graph_executor.cc
@@ -519,8 +519,8 @@ void GraphExecutor::SetupOpExecs() {
   }
 }
 
-std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs>> GraphExecutor::CreateTVMOp(
-    const TVMOpParam& param, const std::vector<DLTensor>& args) {
+std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> >
+GraphExecutor::CreateTVMOp(const TVMOpParam& param, const std::vector<DLTensor>& args) {
   std::shared_ptr<GraphExecutor::OpArgs> arg_ptr = std::make_shared<GraphExecutor::OpArgs>();
   // setup address.
   arg_ptr->args = args;
@@ -674,7 +674,9 @@ PackedFunc GraphExecutor::GetFunction(const std::string& name,
     });
   } else if (name == "get_input_info") {
     return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
-      auto [shape_info, dtype_info] = this->GetInputInfo();
+      GraphExecutor::ShapeInfo shape_info;
+      GraphExecutor::DtypeInfo dtype_info;
+      std::tie(shape_info, dtype_info) = this->GetInputInfo();
       Map<String, ObjectRef> input_info;
       input_info.Set("shape", shape_info);
       input_info.Set("dtype", dtype_info);


[tvm] 15/20: vm.cc optional

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit b2b0772cc47b69d9ebf7466b2d5225270e801938
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 15:08:26 2022 -0700

    vm.cc optional
---
 src/runtime/vm/profiler/vm.cc | 2 +-
 src/runtime/vm/profiler/vm.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/runtime/vm/profiler/vm.cc b/src/runtime/vm/profiler/vm.cc
index db8a3f5dc2..0ace910b5c 100644
--- a/src/runtime/vm/profiler/vm.cc
+++ b/src/runtime/vm/profiler/vm.cc
@@ -73,7 +73,7 @@ PackedFunc VirtualMachineDebug::GetFunction(const std::string& name,
           invoke(arg_name);
           prof_.operator*().Stop();
           auto report = prof_.operator*().Report();
-          prof_ = std::nullopt;  // releases hardware counters
+          prof_ = dmlc::optional<profiling::Profiler>();  // releases hardware counters
           return report;
         });
   } else if (name == "profile_rpc") {
diff --git a/src/runtime/vm/profiler/vm.h b/src/runtime/vm/profiler/vm.h
index f0374c75a7..0c9e94c0dd 100644
--- a/src/runtime/vm/profiler/vm.h
+++ b/src/runtime/vm/profiler/vm.h
@@ -25,11 +25,11 @@
 #ifndef TVM_RUNTIME_VM_PROFILER_VM_H_
 #define TVM_RUNTIME_VM_PROFILER_VM_H_
 
+#include <dmlc/optional.h>
 #include <tvm/runtime/profiling.h>
 #include <tvm/runtime/vm/vm.h>
 
 #include <memory>
-#include <optional>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -55,7 +55,7 @@ class VirtualMachineDebug : public VirtualMachine {
   void OpStopHook() final;
 
   std::unordered_map<Index, std::string> packed_index_map_;
-  std::optional<profiling::Profiler> prof_;
+  dmlc::optional<profiling::Profiler> prof_;
 };
 
 }  // namespace vm


[tvm] 12/20: Merge branch 'aluo/rebase-08312022-autotensorization' of gitlab.com:octoml/tvm into aluo/rebase-08312022-autotensorization

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 90986c86ec6f70e0f3d7d5783a6fc81a1988ce8b
Merge: ac675efd90 46e9243f67
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 14:06:42 2022 -0700

    Merge branch 'aluo/rebase-08312022-autotensorization' of gitlab.com:octoml/tvm into aluo/rebase-08312022-autotensorization

 CONTRIBUTORS.md                                    |   1 +
 Jenkinsfile                                        |  23 +-
 ci/jenkins/Prepare.groovy.j2                       |  21 +-
 ci/scripts/check_pr.py                             | 150 ++++
 ci/scripts/git_skip_ci.py                          |   2 +-
 ci/scripts/github_tvmbot.py                        |  22 +-
 include/tvm/meta_schedule/database.h               |  16 +
 include/tvm/runtime/container/string.h             |  35 +-
 include/tvm/runtime/vm/executable.h                |  13 +
 include/tvm/tir/stmt.h                             |   1 +
 python/tvm/meta_schedule/builder/__init__.py       |   2 +-
 python/tvm/meta_schedule/builder/builder.py        |  17 +
 python/tvm/meta_schedule/database/__init__.py      |   4 +-
 python/tvm/meta_schedule/database/database.py      |  41 +-
 python/tvm/meta_schedule/database/json_database.py |  31 +-
 .../database/ordered_union_database.py             | 112 +++
 .../tvm/meta_schedule/database/union_database.py   | 112 +++
 python/tvm/meta_schedule/runner/__init__.py        |  12 +-
 python/tvm/meta_schedule/runner/runner.py          |  22 +-
 .../tvm/meta_schedule/search_strategy/__init__.py  |   2 +-
 .../search_strategy/search_strategy.py             |  29 +
 .../tvm/meta_schedule/space_generator/__init__.py  |   2 +-
 .../space_generator/space_generator.py             |  28 +
 .../tvm/meta_schedule/task_scheduler/__init__.py   |   4 +-
 .../meta_schedule/task_scheduler/task_scheduler.py |  20 +
 python/tvm/meta_schedule/testing/relay_workload.py |   4 +-
 python/tvm/relay/analysis/analysis.py              |  38 +
 python/tvm/relay/frontend/pytorch.py               |  32 +-
 python/tvm/relay/op/strategy/adreno.py             | 142 ++--
 python/tvm/runtime/vm.py                           |  10 +
 python/tvm/tir/schedule/schedule.py                |  42 +-
 python/tvm/topi/adreno/conv2d_alter_op.py          |  48 +-
 python/tvm/topi/adreno/conv2d_nchw.py              | 117 ++-
 python/tvm/topi/adreno/conv2d_nchw_winograd.py     |  45 +-
 python/tvm/topi/adreno/conv2d_nhwc.py              | 111 ++-
 python/tvm/topi/adreno/conv2d_nhwc_winograd.py     |  45 +-
 python/tvm/topi/adreno/conv2d_winograd_common.py   |  19 +-
 python/tvm/topi/adreno/depthwise_conv2d_nchw.py    |  42 +-
 python/tvm/topi/adreno/depthwise_conv2d_nhwc.py    |  38 +-
 python/tvm/topi/hexagon/__init__.py                |   1 +
 python/tvm/topi/hexagon/injective.py               |   7 +-
 python/tvm/topi/hexagon/slice_ops/argmax.py        |   7 +
 python/tvm/topi/hexagon/tensor_intrin.py           |  71 ++
 src/meta_schedule/database/json_database.cc        |  22 -
 .../database/ordered_union_database.cc             |  86 +++
 src/meta_schedule/database/union_database.cc       |  88 +++
 src/meta_schedule/schedule_rule/auto_inline.cc     |   5 +-
 src/meta_schedule/utils.h                          |  22 +
 src/relay/analysis/extract_intermediate_expr.cc    |  88 +++
 src/relay/backend/contrib/ethosn/ethosn_api.cc     |   7 +-
 src/runtime/vm/executable.cc                       |  24 +-
 src/tir/transforms/ir_utils.cc                     |   9 +
 tests/lint/pylint.sh                               |   1 +
 tests/python/ci/sample_prs/pr10786-badci.json      | 130 ----
 .../ci/sample_prs/pr10786-changes-requested.json   | 131 ----
 tests/python/ci/sample_prs/pr10786-co-authors.json | 129 ----
 .../ci/sample_prs/pr10786-invalid-author.json      | 130 ----
 tests/python/ci/sample_prs/pr10786-merges.json     | 129 ----
 .../python/ci/sample_prs/pr10786-missing-job.json  | 129 ----
 .../python/ci/sample_prs/pr10786-nottriggered.json | 129 ----
 tests/python/ci/sample_prs/pr10786-oldreview.json  | 129 ----
 .../{pr10786-ignore-jobs.json => pr10786.json}     |   5 +-
 .../sample_prs/pr11244-unauthorized-comment.json   | 103 ---
 tests/python/ci/sample_prs/pr11267-no-review.json  | 144 ----
 tests/python/ci/sample_prs/pr11442-rerun-ci.json   | 183 -----
 tests/python/ci/test_ci.py                         | 803 +++++++++------------
 tests/python/ci/test_tvmbot.py                     | 400 +++++-----
 tests/python/ci/test_utils.py                      |  33 +-
 tests/python/contrib/test_ethosn/infrastructure.py |  53 +-
 .../python/contrib/test_ethosn/test_concatenate.py |  51 +-
 .../test_ethosn/test_constant_duplication.py       |  10 +-
 tests/python/contrib/test_ethosn/test_conv2d.py    |  18 +-
 .../test_ethosn/test_convert_equivalents.py        |   1 +
 .../contrib/test_ethosn/test_depth_to_space.py     |   4 +
 .../contrib/test_ethosn/test_fullyconnected.py     |  25 +-
 .../python/contrib/test_ethosn/test_leaky_relu.py  |   2 +
 tests/python/contrib/test_ethosn/test_mean.py      |   2 +
 tests/python/contrib/test_ethosn/test_multiply.py  |   3 +
 tests/python/contrib/test_ethosn/test_networks.py  |  23 +-
 .../contrib/test_ethosn/test_partition_params.py   |  24 +-
 tests/python/contrib/test_ethosn/test_pooling.py   |   8 +-
 tests/python/contrib/test_ethosn/test_relu.py      |   4 +
 .../python/contrib/test_ethosn/test_requantize.py  |   5 +
 tests/python/contrib/test_ethosn/test_reshape.py   |   6 +-
 tests/python/contrib/test_ethosn/test_resize.py    |   4 +
 tests/python/contrib/test_ethosn/test_sigmoid.py   |  11 +-
 tests/python/contrib/test_ethosn/test_split.py     |   9 +-
 tests/python/contrib/test_ethosn/test_tanh.py      |   4 +
 .../python/contrib/test_ethosn/test_topologies.py  |  33 +-
 .../test_hexagon/test_fixed_point_multiply.py      | 140 ++++
 .../contrib/test_hexagon/topi/test_argmax_slice.py |  14 +-
 .../test_hexagon/topi/test_max_pool2d_slice.py     |   3 -
 tests/python/driver/tvmc/test_frontends.py         |   4 +
 .../test_analysis_extract_intermediate_expr.py     | 130 ++++
 tests/python/relay/test_conv2d_nchw_texture.py     |   4 +-
 tests/python/relay/test_conv2d_nhwc_texture.py     |   2 +-
 tests/python/relay/test_vm.py                      |  80 ++
 tests/python/unittest/test_link_params.py          |   9 +-
 .../python/unittest/test_meta_schedule_database.py |  37 +
 ...test_meta_schedule_schedule_rule_auto_inline.py |  28 +
 .../unittest/test_tir_schedule_cache_read_write.py |   8 +-
 .../unittest/test_tir_transform_unroll_loop.py     |   9 +-
 102 files changed, 2661 insertions(+), 2707 deletions(-)


[tvm] 03/20: dnnl pattern matching

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit ce7fcbdae3b28698fc37513cb3e3d65bb3c120b0
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Thu Sep 1 21:46:53 2022 -0700

    dnnl pattern matching
---
 python/tvm/relay/op/contrib/dnnl.py | 64 +++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py
index f7752e41b0..e27449ac43 100644
--- a/python/tvm/relay/op/contrib/dnnl.py
+++ b/python/tvm/relay/op/contrib/dnnl.py
@@ -36,22 +36,18 @@ import logging
 from functools import reduce
 
 import tvm.ir
-from tvm.ir import Op
 from tvm import relay
+from tvm.ir import Op
+from tvm.relay import expr as _expr
 from tvm.relay import transform
-from tvm.relay.expr import GlobalVar
-from tvm.relay.expr_functor import ExprMutator, ExprVisitor
-from tvm.relay.expr import const
-
 from tvm.relay.analysis import analysis as _analysis
-from tvm.relay import expr as _expr
+from tvm.relay.expr import Call, GlobalVar, TupleGetItem, const
+from tvm.relay.expr_functor import ExprMutator, ExprVisitor
 
-from tvm.relay.expr import Call, TupleGetItem
 from ... import _ffi_api
-from ...dataflow_pattern import wildcard, is_op, is_constant, is_expr, rewrite, DFPatternCallback
+from ...dataflow_pattern import DFPatternCallback, is_constant, is_expr, is_op, rewrite, wildcard
 from .register import register_pattern_table
 
-
 logger = logging.getLogger("DNNL")
 supported_post_elts = ["nn.relu", "tanh", "sigmoid", "clip", "gelu", "swish", "mish", None]
 
@@ -809,7 +805,7 @@ def prune_dnnl_subgraphs(mod):
     return new_mod
 
 
-class LayerNormRewrite(DFPatternCallback):
+class LayerNormRewritePattern1(DFPatternCallback):
     """
     A callback to rewrite the following operators into a single layer normalization operator.
 
@@ -826,7 +822,42 @@ class LayerNormRewrite(DFPatternCallback):
             /* ty=Tensor[(1, 3136, 64), float32] */;
     10   %13 = add(%12, meta[relay.Constant][3] /* ty=Tensor[(64), float32] */)
             /* ty=Tensor[(1, 3136, 64), float32] */;
+    """
+
+    def __init__(self):
+        super(LayerNormRewritePattern1, self).__init__()
+        self.data = wildcard()
+        self.gamma = wildcard()
+        self.beta = wildcard()
+        mu = is_op("mean")(self.data)
+        diff = is_op("subtract")(self.data, mu)
+        cdiff = is_op("cast")(diff)
+        const_two = (
+            is_expr(relay.const(2))
+            | is_expr(relay.const(2.0))
+            | is_expr(relay.const(2.0, dtype="float16"))
+        )
+        p1 = is_op("power")(cdiff, const_two)
+        mp1 = is_op("mean")(p1)
+        eps = is_constant()  # TODO: check epsilon is something reasonable
+        added_eps = is_op("add")(mp1, eps)
+        deno = is_op("sqrt")(added_eps)
+        div_out = is_op("divide")(diff, deno)
+        div_out2 = diff * is_op("rsqrt")(added_eps)
+        weighted = is_op("multiply")(div_out | div_out2, self.gamma)
+        added_bias = is_op("add")(weighted, self.beta)
+        self.pattern = added_bias
 
+    def callback(self, pre, post, node_map):
+        data = node_map[self.data][0]
+        gamma = node_map[self.gamma][0]
+        beta = node_map[self.beta][0]
+        return relay.op.nn.layer_norm(data=data, gamma=gamma, beta=beta)
+
+
+class LayerNormRewritePattern2(DFPatternCallback):
+    """
+    A callback to rewrite the following operators into a single layer normalization operator.
     Pattern #2:
     1   %0 = mean(%input, axis=[-1], keepdims=True);
     2   %1 = variance(%input, %0, axis=[-1], keepdims=True);
@@ -842,19 +873,16 @@ class LayerNormRewrite(DFPatternCallback):
     """
 
     def __init__(self):
-        super(LayerNormRewrite, self).__init__()
+        super(LayerNormRewritePattern2, self).__init__()
         self.data = wildcard()
         self.gamma = wildcard()
         self.beta = wildcard()
         mu = is_op("mean")(self.data)
-        diff = is_op("subtract")(self.data, mu)
-        cdiff = diff | is_op("cast")(diff)
-        const_two = is_expr(relay.const(2)) | is_expr(relay.const(2.0))
-        p1 = is_op("power")(cdiff, const_two)
-        mp1 = is_op("mean")(p1) | is_op("variance")(self.data, mu)
+        mp1 = is_op("variance")(self.data, mu)
         eps = is_expr(relay.const(1e-5)) | is_expr(relay.const(1e-6))
         added_eps = is_op("add")(mp1, eps)
         deno = is_op("sqrt")(added_eps)
+        diff = is_op("subtract")(self.data, mu)
         div_out = is_op("divide")(diff, deno)
         div_out2 = diff * is_op("rsqrt")(added_eps)
         weighted = is_op("multiply")(div_out | div_out2, self.gamma)
@@ -872,7 +900,9 @@ def rewrite_layer_norm(mod):
     """Rewrite the input graph to replace multiple operators with a TVM native layer normalization
     operator so that we can offload them to dnnl layer normalization byoc part.
     """
-    mod["main"] = rewrite(LayerNormRewrite(), mod["main"])
+    mod["main"] = rewrite(LayerNormRewritePattern1(), mod["main"])
+    mod["main"] = rewrite(LayerNormRewritePattern2(), mod["main"])
+
     return mod
 
 


[tvm] 07/20: update configs

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 1bae92e12fd595558fe4abe7b913000378b62d9e
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 11:01:56 2022 -0700

    update configs
---
 python/tvm/meta_schedule/default_config.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index c1e886e7f7..a88a9be88d 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -24,7 +24,7 @@ from tvm._ffi.registry import register_func
 from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc
+from tvm.tir import PrimFunc, tensor_intrin
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -460,7 +460,7 @@ class _DefaultCUDATensorCore:
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    get_wmma_intrin_group(
+                    tensor_intrin.get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,


[tvm] 06/20: fix new imports

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 7ee33cbdc260bb670955c73cebc3efa61f6ad0ff
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 21:05:08 2022 -0700

    fix new imports
---
 python/tvm/meta_schedule/default_config.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 73ba0e4fa8..c1e886e7f7 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -24,7 +24,7 @@ from tvm._ffi.registry import register_func
 from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc, tensor_intrin
+from tvm.tir import PrimFunc
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -311,6 +311,7 @@ class _DefaultLLVMVNNI:
     @staticmethod
     def schedule_rules() -> List[ScheduleRule]:
         from tvm.meta_schedule import schedule_rule as M
+        from tvm.tir.tensor_intrin.x86 import VNNI_DOT_16x4_INTRIN
 
         logger.info("Using schedule rule: LLVM VNNI")
 
@@ -326,7 +327,7 @@ class _DefaultLLVMVNNI:
             ),
             M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
             M.MultiLevelTilingWithIntrin(
-                tensor_intrin.VNNI_DOT_16x4_INTRIN,
+                VNNI_DOT_16x4_INTRIN,
                 structure="SSRSRS",
                 tile_binds=None,
                 max_innermost_factor=64,
@@ -459,7 +460,7 @@ class _DefaultCUDATensorCore:
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    tensor_intrin.get_wmma_intrin_group(
+                    get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,


[tvm] 14/20: llvm instance optional

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 3b0984e1ba6b042fe6d79665f138ec858b40efd2
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 14:59:23 2022 -0700

    llvm instance optional
---
 src/target/llvm/llvm_instance.cc | 430 ++-------------------------------------
 src/target/llvm/llvm_instance.h  | 194 ++++--------------
 src/target/llvm/llvm_module.cc   |   8 +-
 3 files changed, 56 insertions(+), 576 deletions(-)

diff --git a/src/target/llvm/llvm_instance.cc b/src/target/llvm/llvm_instance.cc
index 19ff480452..772e71b287 100644
--- a/src/target/llvm/llvm_instance.cc
+++ b/src/target/llvm/llvm_instance.cc
@@ -39,7 +39,6 @@
 #include <llvm/Support/TargetRegistry.h>
 #endif
 #include <llvm/Support/CodeGen.h>
-#include <llvm/Support/CommandLine.h>
 #include <llvm/Support/ErrorOr.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/MemoryBuffer.h>
@@ -57,14 +56,9 @@
 #include <tvm/target/target.h>
 
 #include <atomic>
-#include <cctype>
-#include <memory>
-#include <optional>
-#include <ostream>
 #include <sstream>
 #include <string>
 #include <system_error>
-#include <utility>
 
 namespace tvm {
 namespace codegen {
@@ -142,27 +136,10 @@ std::unique_ptr<llvm::Module> LLVMInstance::ParseBuffer(const llvm::MemoryBuffer
   return module;
 }
 
-// LLVMTargetInfo
-
-std::ostream& operator<<(std::ostream& os, const LLVMTargetInfo::Option& opt) {
-  os << '-' << opt.name;
-  switch (opt.type) {
-    case LLVMTargetInfo::Option::OptType::Bool:
-      return os << ":bool=" << (opt.value.b ? "true" : "false");
-    case LLVMTargetInfo::Option::OptType::Int:
-      return os << ":int=" << opt.value.i;
-    case LLVMTargetInfo::Option::OptType::UInt:
-      return os << ":uint=" << opt.value.u;
-    case LLVMTargetInfo::Option::OptType::String:
-      return os << ":string=" << opt.value.s;
-    default:
-      os << ":?(" << static_cast<int>(opt.type) << ")";
-      break;
-  }
-  return os;
-}
+// LLVMTarget
 
-LLVMTargetInfo::LLVMTargetInfo(LLVMInstance& instance, const Target& target) {
+LLVMTarget::LLVMTarget(LLVMInstance& instance, const Target& target)
+    : instance_(instance), ctx_(instance.GetContext()) {
   triple_ = target->GetAttr<String>("mtriple").value_or("default");
 
   if (triple_.empty() || triple_ == "default") {
@@ -176,26 +153,6 @@ LLVMTargetInfo::LLVMTargetInfo(LLVMInstance& instance, const Target& target) {
     }
   }
 
-  if (const Optional<Array<String>>& v = target->GetAttr<Array<String>>("cl-opt")) {
-    llvm::StringMap<llvm::cl::Option*>& options = llvm::cl::getRegisteredOptions();
-    bool parse_error = false;
-    for (const String& s : v.value()) {
-      Option opt = ParseOptionString(s);
-      if (opt.type == Option::OptType::Invalid) {
-        parse_error = true;
-        continue;
-      }
-      if (options.count(opt.name)) {
-        llvm_options_.push_back(opt);
-      } else {
-        // Flag an error, but don't abort. LLVM flags may change, and this would
-        // give the code a chance to run even if the option no longer applies.
-        LOG(ERROR) << "\"" << opt.name << "\" is not an LLVM option, option ignored";
-      }
-    }
-    ICHECK(!parse_error) << "there were errors parsing command-line options";
-  }
-
   llvm::FloatABI::ABIType float_abi = llvm::FloatABI::Default;
   if (const Optional<String>& v = target->GetAttr<String>("mfloat-abi")) {
     String value = v.value();
@@ -281,12 +238,17 @@ LLVMTargetInfo::LLVMTargetInfo(LLVMInstance& instance, const Target& target) {
   }
 }
 
-LLVMTargetInfo::LLVMTargetInfo(LLVMInstance& scope, const std::string& target_str)
-    : LLVMTargetInfo(scope, Target(target_str)) {}
+LLVMTarget::LLVMTarget(LLVMInstance& scope, const std::string& target_str)
+    : LLVMTarget(scope, Target(target_str)) {}
 
-LLVMTargetInfo::~LLVMTargetInfo() = default;
+LLVMTarget::~LLVMTarget() = default;
+
+llvm::LLVMContext* LLVMTarget::GetContext() const {
+  ICHECK(!ctx_.expired()) << "LLVM scope has been deleted";
+  return ctx_.lock().get();
+}
 
-llvm::TargetMachine* LLVMTargetInfo::GetOrCreateTargetMachine(bool allow_missing) {
+llvm::TargetMachine* LLVMTarget::GetOrCreateTargetMachine(bool allow_missing) {
   if (target_machine_) return target_machine_.get();
 
   std::string error;
@@ -302,11 +264,11 @@ llvm::TargetMachine* LLVMTargetInfo::GetOrCreateTargetMachine(bool allow_missing
   return target_machine_.get();
 }
 
-std::string LLVMTargetInfo::GetTargetFeatureString() const {  //
+std::string LLVMTarget::GetTargetFeatureString() const {  //
   return Join(",", attrs_);
 }
 
-std::string LLVMTargetInfo::str() const {
+std::string LLVMTarget::str() const {
   std::ostringstream os;
   os << "llvm";
   if (!triple_.empty()) {
@@ -378,324 +340,9 @@ std::string LLVMTargetInfo::str() const {
     }
   }
 
-  if (size_t num = llvm_options_.size(); num > 0) {
-    os << " -cl-opt=";
-    std::vector<std::string> opts;
-    for (const Option& opt : llvm_options_) {
-      std::stringstream os;
-      os << opt;
-      opts.emplace_back(os.str());
-    }
-    auto* quote = num > 1 ? "'" : "";
-    os << quote << Join(",", opts) << quote;
-  }
-
   return os.str();
 }
 
-LLVMTargetInfo::Option LLVMTargetInfo::ParseOptionString(const std::string& str) {
-  Option opt;
-  opt.type = Option::OptType::Invalid;
-
-  // Option string: "-"+ <option_name> ":" <type> "=" <value>
-  //
-  // Note: "-"+ means 1 or more dashes, but only "-" are "--" valid.
-
-  // The first step is to do "lexing" of the option string, i.e. to break
-  // it up into parts (like "tokens") according to the syntax above. These
-  // parts will be non-overlapping substrings of the option string, and
-  // concatenated together, they will be equal to the option string.
-  // The literal elements are parts on their own.
-  //
-  // Note that the option string may be malformed, so any of the literal
-  // elements in the syntax may be missing.
-
-  std::vector<std::string> parts;
-
-  auto find_first_of = [](const std::string& str, const std::string& chars, auto start = 0) {
-    auto pos = str.find_first_of(chars, start);
-    return pos != std::string::npos ? pos : str.size();
-  };
-  auto find_first_not_of = [](const std::string& str, const std::string& chars, auto start = 0) {
-    auto pos = str.find_first_not_of(chars, start);
-    return pos != std::string::npos ? pos : str.size();
-  };
-
-  // "-"+
-  std::string::size_type pos_start = 0, pos_end = str.size();
-  std::string::size_type pos_at = find_first_not_of(str, "-", pos_start);
-  if (pos_at > 0) {
-    parts.push_back(str.substr(pos_start, pos_at));
-  }
-  // <option_name>, always present, may be empty string
-  pos_start = pos_at;
-  pos_at = find_first_of(str, ":=", pos_start);
-  parts.push_back(str.substr(pos_start, pos_at - pos_start));
-
-  // ":" or "=", if any
-  pos_start = pos_at;
-  char c = pos_start < pos_end ? str[pos_start] : 0;
-  if (c != 0) {
-    parts.emplace_back(1, c);
-    pos_start++;
-  }
-  // If the character found in the previous step wasn't '=', look for '='.
-  if (c == ':') {
-    // <type>
-    pos_at = find_first_of(str, "=", pos_start);
-    if (pos_at > pos_start) {  // if non-empty
-      parts.push_back(str.substr(pos_start, pos_at - pos_start));
-    }
-
-    // "="
-    if (pos_at < pos_end) {
-      parts.emplace_back(1, str[pos_at]);
-      pos_start = pos_at + 1;
-    }
-  }
-  if (pos_start < pos_end) {
-    // <value>
-    parts.push_back(str.substr(pos_start));
-  }
-
-  // After breaking up the option string, examine and validate the individual
-  // parts.
-
-  int part_this = 0, part_end = parts.size();
-
-  const std::string error_header = "while parsing option \"" + str + "\": ";
-
-  // Check for "-" or "--".
-  if (part_this < part_end) {
-    auto& p = parts[part_this++];
-    if ((p.size() != 1 && p.size() != 2) || p.find_first_not_of('-') != std::string::npos) {
-      LOG(ERROR) << error_header << "option must start with \"-\" or \"--\"";
-      return opt;
-    }
-  }
-
-  // Validate option name.
-  if (part_this < part_end) {
-    auto& p = parts[part_this++];
-    if (p.empty()) {
-      LOG(ERROR) << error_header << "option name must not be empty";
-      return opt;
-    }
-    opt.name = std::move(p);
-  }
-
-  // Check type, if present.
-  Option::OptType type = Option::OptType::Invalid;
-  if (part_this < part_end) {
-    auto& p0 = parts[part_this];
-    if (p0 == ":") {
-      part_this++;  // Only advance if we saw ":".
-      if (part_this < part_end) {
-        auto& p1 = parts[part_this];
-        ICHECK(!p1.empty()) << "tokenizing error";  // This shouldn't happen.
-        if (p1 != "=") {
-          part_this++;
-          if (p1 == "bool") {
-            type = Option::OptType::Bool;
-          } else if (p1 == "int") {
-            type = Option::OptType::Int;
-          } else if (p1 == "uint") {
-            type = Option::OptType::UInt;
-          } else if (p1 == "string") {
-            type = Option::OptType::String;
-          }
-        }
-      }
-      // If there was ":", there must be a type.
-      if (type == Option::OptType::Invalid) {
-        LOG(ERROR) << error_header << "invalid type";
-        return opt;
-      }
-    }
-  }
-
-  // Check value, if present.
-  std::optional<std::string> value;
-  if (part_this < part_end) {
-    auto& p0 = parts[part_this];
-    if (p0 == "=") {
-      part_this++;
-      if (part_this < part_end) {
-        value = std::move(parts[part_this]);
-      } else {
-        value = "";
-      }
-    } else {
-      // If there are still any parts left to be processed, there must be "=".
-      LOG(ERROR) << error_header << "expecting \"=\"";
-      return opt;
-    }
-  }
-
-  // NOLINTNEXTLINE(runtime/int)
-  auto to_integer = [](const std::string& s) -> std::optional<long long> {
-    // std::stoll takes "long long"
-    long long number;  // NOLINT(runtime/int)
-    size_t pos;
-    try {
-      number = std::stoll(s, &pos);
-    } catch (...) {
-      return std::nullopt;
-    }
-    if (pos == s.size()) {
-      return number;
-    } else {
-      return std::nullopt;
-    }
-  };
-
-  auto to_boolean = [&to_integer](const std::string& s) -> std::optional<bool> {
-    // Return 0 or 1, if string corresponds to a valid boolean value,
-    // otherwise return 2.
-    auto ti = to_integer(s);
-    if (ti.has_value() && (ti.value() == 0 || ti.value() == 1)) {
-      return static_cast<bool>(ti.value());
-    }
-
-    std::string lower;
-    std::transform(s.begin(), s.end(), std::back_inserter(lower),
-                   [](unsigned char c) { return std::tolower(c); });
-    if (lower == "true") {
-      return true;
-    } else if (lower == "false") {
-      return false;
-    }
-    return std::nullopt;
-  };
-
-  if (value.has_value()) {
-    if (type == Option::OptType::Int || type == Option::OptType::UInt) {
-      auto v = to_integer(value.value());
-      if (!v.has_value()) {
-        LOG(ERROR) << error_header << "invalid integer value \"" << value.value() << "\"";
-        return opt;
-      }
-      if (type == Option::OptType::Int) {
-        opt.value.i = static_cast<int>(v.value());
-        if (opt.value.i != v.value()) {
-          LOG(WARNING) << error_header << "value exceeds int range, assuming " << opt.value.i;
-        }
-      } else {
-        // NOLINTNEXTLINE(runtime/int)
-        opt.value.u = static_cast<unsigned>(static_cast<unsigned long long>(v.value()));
-        if (opt.value.u != static_cast<unsigned long long>(v.value())) {  // NOLINT(runtime/int)
-          LOG(WARNING) << error_header << "value exceeds int range, assuming " << opt.value.u;
-        }
-      }
-    } else if (type == Option::OptType::String) {
-      opt.value.s = std::move(value.value());
-    } else {
-      // "type" is either Bool (given explicitly) or Invalid (type not present in string)
-      auto v = to_boolean(value.value());
-      if (!v.has_value()) {
-        LOG(ERROR) << error_header << "invalid boolean value \"" << value.value() << "\"";
-        return opt;
-      }
-      opt.value.b = v.value();
-      type = Option::OptType::Bool;
-    }
-  } else {
-    // Value was not present in string. Assume "true" if "type" is Bool or Invalid
-    if (type == Option::OptType::Bool || type == Option::OptType::Invalid) {
-      opt.value.b = true;
-      type = Option::OptType::Bool;
-    } else {
-      LOG(ERROR) << error_header << "must have a value";
-      return opt;
-    }
-  }
-
-  ICHECK(type != Option::OptType::Invalid);
-  opt.type = type;
-  return opt;
-}
-
-bool LLVMTargetInfo::MatchesGlobalState() const {
-  for (const Option& opt : GetCommandLineOptions()) {
-    Option current_opt = opt;
-    GetOptionValue(&current_opt);
-    ICHECK(current_opt.type != Option::OptType::Invalid);
-    switch (current_opt.type) {
-      case Option::OptType::Bool:
-        if (current_opt.value.b != opt.value.b) return false;
-        continue;
-      case Option::OptType::Int:
-        if (current_opt.value.i != opt.value.i) return false;
-        continue;
-      case Option::OptType::UInt:
-        if (current_opt.value.u != opt.value.u) return false;
-        continue;
-      case Option::OptType::String:
-        if (current_opt.value.s != opt.value.s) return false;
-        continue;
-      default:;  // NOLINT(whitespace/semicolon)
-    }
-  }
-  return true;
-}
-
-void LLVMTargetInfo::GetOptionValue(LLVMTargetInfo::Option* opt) const {
-  llvm::StringMap<llvm::cl::Option*>& options = llvm::cl::getRegisteredOptions();
-  llvm::cl::Option* base_op = options[opt->name];
-
-  if (opt->type == Option::OptType::Bool) {
-    auto* bool_op = static_cast<llvm::cl::opt<bool>*>(base_op);
-    opt->value.b = bool_op->getValue();
-  } else if (opt->type == Option::OptType::Int) {
-    auto* int_op = static_cast<llvm::cl::opt<int>*>(base_op);
-    opt->value.i = int_op->getValue();
-  } else if (opt->type == Option::OptType::UInt) {
-    auto* uint_op = static_cast<llvm::cl::opt<unsigned>*>(base_op);
-    opt->value.u = uint_op->getValue();
-  } else if (opt->type == Option::OptType::String) {
-    auto* str_op = static_cast<llvm::cl::opt<std::string>*>(base_op);
-    opt->value.s = str_op->getValue();
-  } else {
-    opt->type = Option::OptType::Invalid;
-  }
-}
-
-// LLVMTarget
-
-bool LLVMTarget::modified_llvm_state_ = false;
-
-LLVMTarget::LLVMTarget(LLVMInstance& instance, const LLVMTargetInfo& target_info)
-    : LLVMTargetInfo(target_info), instance_(instance), ctx_(instance.GetContext()) {
-  // Populate the list of saved options with the current values.
-  for (const Option& opt : GetCommandLineOptions()) {
-    GetOptionValue(&saved_llvm_options_.emplace_back(opt));
-  }
-
-  if (modified_llvm_state_) {
-    ICHECK(!ApplyLLVMOptions(true));
-  } else {
-    modified_llvm_state_ = ApplyLLVMOptions(true);
-  }
-}
-
-LLVMTarget::LLVMTarget(LLVMInstance& instance, const Target& target)
-    : LLVMTarget(instance, LLVMTargetInfo(instance, target)) {}
-
-LLVMTarget::LLVMTarget(LLVMInstance& scope, const std::string& target_str)
-    : LLVMTarget(scope, Target(target_str)) {}
-
-LLVMTarget::~LLVMTarget() {
-  // Revert all applied LLVM options.
-  if (ApplyLLVMOptions(false)) {
-    modified_llvm_state_ = false;
-  }
-}
-
-llvm::LLVMContext* LLVMTarget::GetContext() const {
-  ICHECK(!ctx_.expired()) << "LLVM scope has been deleted";
-  return ctx_.lock().get();
-}
-
 std::string LLVMTarget::GetTargetMetadata(const llvm::Module& module) {
   if (llvm::Metadata* tvm_target = module.getModuleFlag("tvm_target")) {
     auto* mdstr = llvm::cast<llvm::MDString>(tvm_target);
@@ -712,55 +359,6 @@ void LLVMTarget::SetTargetMetadata(llvm::Module* module) const {
                         llvm::MDString::get(*GetContext(), str()));
 }
 
-bool LLVMTarget::ApplyLLVMOptions(bool apply_otherwise_revert, bool dry_run) {
-  llvm::StringMap<llvm::cl::Option*>& options = llvm::cl::getRegisteredOptions();
-  bool changed = false;
-
-#define HANDLE_OPTION_VALUE(option, new_val, saved_val)                  \
-  do {                                                                   \
-    auto current = (option)->getValue();                                 \
-    auto replacement = apply_otherwise_revert ? (new_val) : (saved_val); \
-    if (current != replacement) {                                        \
-      changed = true;                                                    \
-      if (!dry_run) {                                                    \
-        (option)->setValue(replacement);                                 \
-      }                                                                  \
-    }                                                                    \
-  } while (false);
-
-  const auto& new_options = GetCommandLineOptions();
-  for (size_t i = 0, e = saved_llvm_options_.size(); i != e; ++i) {
-    const Option& new_opt = new_options[i];
-    const Option& saved_opt = saved_llvm_options_[i];
-
-    llvm::cl::Option* base_op = options[new_opt.name];
-
-    if (new_opt.type == Option::OptType::Bool) {
-      auto* bool_op = static_cast<llvm::cl::opt<bool>*>(base_op);
-      HANDLE_OPTION_VALUE(bool_op, new_opt.value.b, saved_opt.value.b);
-    } else if (new_opt.type == Option::OptType::Int) {
-      auto* int_op = static_cast<llvm::cl::opt<int>*>(base_op);
-      HANDLE_OPTION_VALUE(int_op, new_opt.value.i, saved_opt.value.i);
-    } else if (new_opt.type == Option::OptType::UInt) {
-      auto* uint_op = static_cast<llvm::cl::opt<unsigned>*>(base_op);
-      HANDLE_OPTION_VALUE(uint_op, new_opt.value.u, saved_opt.value.u);
-    } else if (new_opt.type == Option::OptType::String) {
-      auto* str_op = static_cast<llvm::cl::opt<std::string>*>(base_op);
-      HANDLE_OPTION_VALUE(str_op, new_opt.value.s, saved_opt.value.s);
-    } else {
-      LOG(FATAL) << "unexpected type in option " << new_opt;
-    }
-
-    if (dry_run && changed) {
-      return true;
-    }
-  }
-
-#undef HANDLE_OPTION_VALUE
-
-  return changed;
-}
-
 }  // namespace codegen
 }  // namespace tvm
 
diff --git a/src/target/llvm/llvm_instance.h b/src/target/llvm/llvm_instance.h
index 217db63aad..afb6e58deb 100644
--- a/src/target/llvm/llvm_instance.h
+++ b/src/target/llvm/llvm_instance.h
@@ -38,7 +38,6 @@
 #include <tvm/runtime/container/string.h>
 #include <tvm/target/target.h>
 
-#include <algorithm>
 #include <memory>
 #include <string>
 #include <utility>
@@ -58,9 +57,8 @@ class LLVMTarget;
 
 /*!
  * \class LLVMInstance
- * \brief LLVMInstance is a class that (conceptually) starts and stops LLVM.
- *        All uses of LLVM should take place within a lifetime of an object
- *        of this class.
+ * \brief LLVMInstance is a class that (conceptually) starts and stops LLVM. All
+ * uses of LLVM should take place within a lifetime of an object of this class.
  *
  * E.g.
  * ```{.cpp}
@@ -130,48 +128,60 @@ class LLVMInstance {
 };
 
 /*!
- * \class LLVMTargetInfo
- * \brief Summary of information for this TVM target relevant to LLVM code
- *        generation.
+ * \class LLVMTarget
+ * \brief Information used by LLVM for code generation for particular target
  *
  * This class contains all information that LLVM needs for code generation for
- * a particular target. The purpose of this class is only to provide information
- * in an easily-accessible form (for example for querying the target properties).
+ * a particular target. Since Target in TVM will soon contain command line
+ * flags for LLVM, objects of this class will handle saving and restoring
+ * global LLVM state that may be affected by these flags. This way, code
+ * generation for each LLVM-based target in TVM will start with the same LLVM
+ * global state.
  *
  * Note that objects of this class must be created within the lifetime of an
  * LLVMInstance object.
  */
-class LLVMTargetInfo {
+class LLVMTarget {
  public:
   /*!
-   * \brief Constructs LLVMTargetInfo from `Target`
+   * \brief Constructs LLVMTarget from `Target`
    * \param scope LLVMInstance object
    * \param target TVM Target object for target "llvm"
    */
-  LLVMTargetInfo(LLVMInstance& scope, const Target& target);  // NOLINT(runtime/references)
+  LLVMTarget(LLVMInstance& scope, const Target& target);  // NOLINT(runtime/references)
   /*!
-   * \brief Constructs LLVMTargetInfo from target string
+   * \brief Constructs LLVMTarget from target string
    * \param scope LLVMInstance object
    * \param target TVM target string for target "llvm"
    */
-  // NOLINTNEXTLINE(runtime/references)
-  LLVMTargetInfo(LLVMInstance& scope, const std::string& target_str);
+  LLVMTarget(LLVMInstance& scope, const std::string& target_str);  // NOLINT(runtime/references)
   /*!
-   * \brief Destroys LLVMTargetInfo object
+   * \brief Destroys LLVMTarget object
    */
-  ~LLVMTargetInfo();
+  ~LLVMTarget();
 
   /*!
-   * \brief Returns string representation (as TVM target) of the LLVMTargetInfo
+   * \brief Returns string representation (as TVM target) of the LLVMTarget
    * \return Target string
    *
-   * Note: If the LLVMTargetInfo object was created from a string `s`, the string
+   * Note: If the LLVMTarget object was created from a string `s`, the string
    * returned here may not be exactly equal to `s`. For example, if the CPU
    * was "default", the returned string will have CPU set to the detected host
    * CPU.
    */
   std::string str() const;
 
+  /*!
+   * \brief Get the LLVMInstance object from which the LLVMTarget object was
+   *        created
+   * \return The enclosing LLVMInstance object
+   */
+  const LLVMInstance& GetInstance() const { return instance_; }
+  /*!
+   * \brief Get the current LLVM context
+   * \return the current LLVM context
+   */
+  llvm::LLVMContext* GetContext() const;
   /*!
    * \brief Return LLVM's `TargetMachine`, or nullptr
    * \param allow_missing do not abort if the target machine cannot be created,
@@ -218,125 +228,6 @@ class LLVMTargetInfo {
    */
   llvm::CodeGenOpt::Level GetOptLevel() const { return opt_level_; }
 
-  /*!
-   * \class Option
-   * \brief Internal representation of command-line option
-   */
-  struct Option {
-    enum class OptType {
-      Invalid = 0,  //!< placeholder, indicates parsing error
-      Bool,         //!< enum value corresponding to type string "bool"
-      Int,          //!< enum value corresponding to type string "int"
-      UInt,         //!< enum value corresponding to type string "uint"
-      String,       //!< enum value corresponding to type string "string"
-    };
-    std::string name;  //!< option name
-    OptType type;      //!< type of the option value
-    struct {
-      union {
-        bool b;          //!< bool option value
-        int i;           //!< int option value
-        unsigned u = 0;  //!< unsigned option value
-      };
-      std::string s;  //!< string option value
-    } value;          //!< option value specified in the option string
-  };
-
-  /*!
-   * \brief Get LLVM command line options
-   * \return the list of LLVM command line options specified for this target
-   */
-  const std::vector<Option>& GetCommandLineOptions() const { return llvm_options_; }
-
-  /*!
-   * \brief Parse a string from the `cl-opt` target attribute
-   * \param str the option string
-   * \return parsed `Option` object, if parsing failed the type member will be
-   *         set to `Option::OptType::Invalid`
-   */
-  static Option ParseOptionString(const std::string& str);
-
-  /*!
-   * \brief Checks if the settings in this object that describe global state
-   *        match the current global state
-   * \return true or false correspondingly
-   * \note The global state can be modified by command line options. This
-   *       function checks if the specified options differ from their current
-   *       values.
-   */
-  bool MatchesGlobalState() const;
-
- protected:
-  /*!
-   * \brief Get the current value of given LLVM option
-   * \param opt Option with "type" and "name" set
-   * Fills in the "value" field in the provided Option argument, or sets the
-   * "type" to Invalid if the option value cannot be obtained.
-   */
-  void GetOptionValue(Option* opt) const;
-
- private:
-  std::string triple_;
-  std::string cpu_;
-  std::vector<std::string> attrs_;
-  std::vector<Option> llvm_options_;
-  llvm::TargetOptions target_options_;
-  llvm::FastMathFlags fast_math_flags_;
-  llvm::CodeGenOpt::Level opt_level_;
-  llvm::Reloc::Model reloc_model_ = llvm::Reloc::PIC_;
-  llvm::CodeModel::Model code_model_ = llvm::CodeModel::Small;
-  std::shared_ptr<llvm::TargetMachine> target_machine_;
-};
-
-/*!
- * \class LLVMTarget
- * \brief Information used by LLVM for code generation for particular target
- *
- * In addition to all information that LLVM needs for code generation for
- * a particular target, objects of this class handle saving and restoring
- * global LLVM state that may be affected by these flags. This way, code
- * generation for each LLVM-based target in TVM will start with the same LLVM
- * global state.
- *
- * Note that objects of this class must be created within the lifetime of an
- * LLVMInstance object.
- */
-class LLVMTarget : public LLVMTargetInfo {
- public:
-  /*!
-   * \brief Constructs LLVMTarget from `Target`
-   * \param scope LLVMInstance object
-   * \param target_info Target info object for target "llvm"
-   */
-  LLVMTarget(LLVMInstance& scope, const LLVMTargetInfo& target_info);  // NOLINT(runtime/references)
-  /*!
-   * \brief Constructs LLVMTarget from `Target`
-   * \param scope LLVMInstance object
-   * \param target TVM Target object for target "llvm"
-   */
-  LLVMTarget(LLVMInstance& scope, const Target& target);  // NOLINT(runtime/references)
-  /*!
-   * \brief Constructs LLVMTarget from target string
-   * \param scope LLVMInstance object
-   * \param target TVM target string for target "llvm"
-   */
-  LLVMTarget(LLVMInstance& scope, const std::string& target_str);  // NOLINT(runtime/references)
-  /*!
-   * \brief Destroys LLVMTarget object
-   */
-  ~LLVMTarget();
-
-  /*!
-   * \brief Get the LLVMInstance object from which the LLVMTarget object was
-   *        created
-   * \return The enclosing LLVMInstance object
-   */
-  const LLVMInstance& GetInstance() const { return instance_; }
-  /*!
-   * \brief Get the current LLVM context
-   * \return the current LLVM context
-   */
-  llvm::LLVMContext* GetContext() const;
   /*!
    * \brief Extract the target string from given `llvm::Module`
    * \param module LLVM module with the TVM target string embedded as metadata
@@ -354,27 +245,18 @@ class LLVMTarget : public LLVMTargetInfo {
   void ExitWithScope() {}
 
  private:
-  std::vector<Option> saved_llvm_options_;
-
-  /*!
-   * \brief Apply or revert command-line LLVM options
-   * \param apply_otherwise_revert if true, apply the options (saving previous
-   *        values, if false, then restore the saved values
-   * \param dry_run if true, do not make any changes (or save anything)
-   * \return true is changes were made (or would have been made in a dry run),
-   *         false otherwise
-   */
-  bool ApplyLLVMOptions(bool apply_otherwise_revert, bool dry_run = false);
-
   const LLVMInstance& instance_;
   std::weak_ptr<llvm::LLVMContext> ctx_;
 
-  /*!
-   * \brief Global singleton flag indicating whether LLVM's global state has
-   *        been modified or not (via command-line flags). There can only be
-   *        a single such modification in effect at any given time.
-   */
-  static bool modified_llvm_state_;
+  std::string triple_;
+  std::string cpu_;
+  std::vector<std::string> attrs_;
+  llvm::TargetOptions target_options_;
+  llvm::FastMathFlags fast_math_flags_;
+  llvm::CodeGenOpt::Level opt_level_;
+  llvm::Reloc::Model reloc_model_ = llvm::Reloc::PIC_;
+  llvm::CodeModel::Model code_model_ = llvm::CodeModel::Small;
+  std::shared_ptr<llvm::TargetMachine> target_machine_;
 };
 
 }  // namespace codegen
diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
index 8749925781..9aed66fffc 100644
--- a/src/target/llvm/llvm_module.cc
+++ b/src/target/llvm/llvm_module.cc
@@ -390,8 +390,8 @@ void LLVMModuleNode::LazyInitJIT() {
 }
 
 bool LLVMModuleNode::IsCompatibleWithHost(const llvm::TargetMachine* tm) const {
-  LLVMTargetInfo host_target(*llvm_instance_, "llvm");
-  auto tm_host = host_target.GetOrCreateTargetMachine();
+  With<LLVMTarget> host_target(*llvm_instance_, "llvm");  // FIXME(kparzysz-quic): nesting
+  auto tm_host = host_target->GetOrCreateTargetMachine();
   if (tm_host->getTargetTriple().getArch() != tm->getTargetTriple().getArch()) {
     LOG(INFO) << "Architecture mismatch: module=" << tm->getTargetTriple().str()
               << " host=" << tm_host->getTargetTriple().str();
@@ -496,7 +496,7 @@ runtime::Module CreateLLVMCppMetadataModule(runtime::metadata::Metadata metadata
   auto llvm_instance = std::make_unique<LLVMInstance>();
   With<LLVMTarget> llvm_target(*llvm_instance, target);
   bool system_lib = runtime->GetAttr<Bool>("system-lib").value_or(Bool(false));
-  auto cg = std::make_unique<CodeGenCPU>();
+  std::unique_ptr<CodeGenCPU> cg{new CodeGenCPU()};
 
   cg->Init("TVMMetadataMod", llvm_target.get(), system_lib, system_lib,
            /*target_c_runtime=*/false);
@@ -544,7 +544,7 @@ runtime::Module CreateLLVMCrtMetadataModule(const Array<runtime::Module>& module
   ICHECK(system_lib && target_c_runtime)
       << "For LLVM C-runtime metadata module, must include --system-lib and --runtime=c; "
       << "got target: " << target->str();
-  auto cg = std::make_unique<CodeGenCPU>();
+  std::unique_ptr<CodeGenCPU> cg{new CodeGenCPU()};
   cg->Init("TVMMetadataMod", llvm_target.operator->(), system_lib, system_lib, target_c_runtime);
 
   cg->DefineFunctionRegistry(func_names);


[tvm] 16/20: ad simplify optional

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit c591a2e9e9715a07891740f4f7ec89ca25fde427
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 15:12:20 2022 -0700

    ad simplify optional
---
 src/te/autodiff/ad_simplify.cc | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/te/autodiff/ad_simplify.cc b/src/te/autodiff/ad_simplify.cc
index 26047e879e..240adf14b3 100644
--- a/src/te/autodiff/ad_simplify.cc
+++ b/src/te/autodiff/ad_simplify.cc
@@ -44,6 +44,7 @@
  * Due to TVM's restriction, we also lift the reduction to the top of the compute stage.
  *
  */
+#include <dmlc/optional.h>
 #include <tvm/arith/analyzer.h>
 #include <tvm/arith/int_solver.h>
 #include <tvm/runtime/registry.h>
@@ -53,7 +54,6 @@
 
 #include <iterator>
 #include <memory>
-#include <optional>
 #include <utility>
 
 #include "ad_utils.h"
@@ -629,9 +629,9 @@ class EliminateDivModMutator : public ExprMutator {
   }
 
  private:
-  std::optional<std::pair<Var, Var>> AddNewVarPair(const PrimExpr& e, const PrimExpr& mut,
-                                                   int64_t val, DivMode mode) {
-    using tresult = std::optional<std::pair<Var, Var>>;
+  dmlc::optional<std::pair<Var, Var>> AddNewVarPair(const PrimExpr& e, const PrimExpr& mut,
+                                                    int64_t val, DivMode mode) {
+    using tresult = dmlc::optional<std::pair<Var, Var>>;
 
     // Try to find the variables using the mutated expressions
     if (!e.same_as(mut)) {
@@ -1183,19 +1183,21 @@ PrimExpr RemoveJacobianAndLiftNonzeroCondImpl(const PrimExpr& expr_orig, const A
         return RemoveJacobianAndLiftNonzeroCondImpl(new_red, axis, vranges);
       }
 
+      PrimExpr new_outer_cond, new_reduce_cond;
       Array<PrimExpr> new_source = red->source;
 
       // Partially lift conditions from the reduce condition
-      auto [new_outer_cond, new_reduce_cond] =
+      std::tie(new_outer_cond, new_reduce_cond) =
           LiftConditionsThroughReduction(red->condition, red->axis, axis);
 
       // If it's not sum then we haven't yet lifted nonzeroness cond from the source
       if (!is_sum) {
+        PrimExpr outer_nz_cond, nz_cond, nz_source;
         auto nz = NonzeronessCondition(red->source[red->value_index]);
         // Append conditions from the reduction
-        PrimExpr nz_source = nz.value;
-        auto [outer_nz_cond, nz_cond] =
-            LiftConditionsThroughReduction(new_reduce_cond && nz.cond, red->axis, axis);
+        nz_cond = new_reduce_cond && nz.cond;
+        nz_source = nz.value;
+        std::tie(outer_nz_cond, nz_cond) = LiftConditionsThroughReduction(nz_cond, red->axis, axis);
         new_outer_cond = new_outer_cond && outer_nz_cond;
         new_source.Set(red->value_index, Select(nz_cond, nz_source, make_zero(nz_source.dtype())));
       }


[tvm] 10/20: optional cast

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit a45cd01be21579d0eee846b6187bdce59e2408a0
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 12:37:50 2022 -0700

    optional cast
---
 python/tvm/relay/op/contrib/dnnl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py
index e27449ac43..67909b04b8 100644
--- a/python/tvm/relay/op/contrib/dnnl.py
+++ b/python/tvm/relay/op/contrib/dnnl.py
@@ -831,7 +831,7 @@ class LayerNormRewritePattern1(DFPatternCallback):
         self.beta = wildcard()
         mu = is_op("mean")(self.data)
         diff = is_op("subtract")(self.data, mu)
-        cdiff = is_op("cast")(diff)
+        cdiff = is_op("cast")(diff) | diff  # cast does not need to be here usually
         const_two = (
             is_expr(relay.const(2))
             | is_expr(relay.const(2.0))


[tvm] 09/20: dnnl pattern matching

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 946815850b8f7b13a02fddb46e5cc7b7be01aa58
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Thu Sep 1 21:46:53 2022 -0700

    dnnl pattern matching
---
 python/tvm/relay/op/contrib/dnnl.py | 64 +++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 17 deletions(-)

diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py
index f7752e41b0..e27449ac43 100644
--- a/python/tvm/relay/op/contrib/dnnl.py
+++ b/python/tvm/relay/op/contrib/dnnl.py
@@ -36,22 +36,18 @@ import logging
 from functools import reduce
 
 import tvm.ir
-from tvm.ir import Op
 from tvm import relay
+from tvm.ir import Op
+from tvm.relay import expr as _expr
 from tvm.relay import transform
-from tvm.relay.expr import GlobalVar
-from tvm.relay.expr_functor import ExprMutator, ExprVisitor
-from tvm.relay.expr import const
-
 from tvm.relay.analysis import analysis as _analysis
-from tvm.relay import expr as _expr
+from tvm.relay.expr import Call, GlobalVar, TupleGetItem, const
+from tvm.relay.expr_functor import ExprMutator, ExprVisitor
 
-from tvm.relay.expr import Call, TupleGetItem
 from ... import _ffi_api
-from ...dataflow_pattern import wildcard, is_op, is_constant, is_expr, rewrite, DFPatternCallback
+from ...dataflow_pattern import DFPatternCallback, is_constant, is_expr, is_op, rewrite, wildcard
 from .register import register_pattern_table
 
-
 logger = logging.getLogger("DNNL")
 supported_post_elts = ["nn.relu", "tanh", "sigmoid", "clip", "gelu", "swish", "mish", None]
 
@@ -809,7 +805,7 @@ def prune_dnnl_subgraphs(mod):
     return new_mod
 
 
-class LayerNormRewrite(DFPatternCallback):
+class LayerNormRewritePattern1(DFPatternCallback):
     """
     A callback to rewrite the following operators into a single layer normalization operator.
 
@@ -826,7 +822,42 @@ class LayerNormRewrite(DFPatternCallback):
             /* ty=Tensor[(1, 3136, 64), float32] */;
     10   %13 = add(%12, meta[relay.Constant][3] /* ty=Tensor[(64), float32] */)
             /* ty=Tensor[(1, 3136, 64), float32] */;
+    """
+
+    def __init__(self):
+        super(LayerNormRewritePattern1, self).__init__()
+        self.data = wildcard()
+        self.gamma = wildcard()
+        self.beta = wildcard()
+        mu = is_op("mean")(self.data)
+        diff = is_op("subtract")(self.data, mu)
+        cdiff = is_op("cast")(diff)
+        const_two = (
+            is_expr(relay.const(2))
+            | is_expr(relay.const(2.0))
+            | is_expr(relay.const(2.0, dtype="float16"))
+        )
+        p1 = is_op("power")(cdiff, const_two)
+        mp1 = is_op("mean")(p1)
+        eps = is_constant()  # TODO: check epsilon is something reasonable
+        added_eps = is_op("add")(mp1, eps)
+        deno = is_op("sqrt")(added_eps)
+        div_out = is_op("divide")(diff, deno)
+        div_out2 = diff * is_op("rsqrt")(added_eps)
+        weighted = is_op("multiply")(div_out | div_out2, self.gamma)
+        added_bias = is_op("add")(weighted, self.beta)
+        self.pattern = added_bias
 
+    def callback(self, pre, post, node_map):
+        data = node_map[self.data][0]
+        gamma = node_map[self.gamma][0]
+        beta = node_map[self.beta][0]
+        return relay.op.nn.layer_norm(data=data, gamma=gamma, beta=beta)
+
+
+class LayerNormRewritePattern2(DFPatternCallback):
+    """
+    A callback to rewrite the following operators into a single layer normalization operator.
     Pattern #2:
     1   %0 = mean(%input, axis=[-1], keepdims=True);
     2   %1 = variance(%input, %0, axis=[-1], keepdims=True);
@@ -842,19 +873,16 @@ class LayerNormRewrite(DFPatternCallback):
     """
 
     def __init__(self):
-        super(LayerNormRewrite, self).__init__()
+        super(LayerNormRewritePattern2, self).__init__()
         self.data = wildcard()
         self.gamma = wildcard()
         self.beta = wildcard()
         mu = is_op("mean")(self.data)
-        diff = is_op("subtract")(self.data, mu)
-        cdiff = diff | is_op("cast")(diff)
-        const_two = is_expr(relay.const(2)) | is_expr(relay.const(2.0))
-        p1 = is_op("power")(cdiff, const_two)
-        mp1 = is_op("mean")(p1) | is_op("variance")(self.data, mu)
+        mp1 = is_op("variance")(self.data, mu)
         eps = is_expr(relay.const(1e-5)) | is_expr(relay.const(1e-6))
         added_eps = is_op("add")(mp1, eps)
         deno = is_op("sqrt")(added_eps)
+        diff = is_op("subtract")(self.data, mu)
         div_out = is_op("divide")(diff, deno)
         div_out2 = diff * is_op("rsqrt")(added_eps)
         weighted = is_op("multiply")(div_out | div_out2, self.gamma)
@@ -872,7 +900,9 @@ def rewrite_layer_norm(mod):
     """Rewrite the input graph to replace multiple operators with a TVM native layer normalization
     operator so that we can offload them to dnnl layer normalization byoc part.
     """
-    mod["main"] = rewrite(LayerNormRewrite(), mod["main"])
+    mod["main"] = rewrite(LayerNormRewritePattern1(), mod["main"])
+    mod["main"] = rewrite(LayerNormRewritePattern2(), mod["main"])
+
     return mod
 
 


[tvm] 11/20: old string without stringview

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 46e9243f676222822bbb1ef3e794171de77d6d54
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 2 14:03:58 2022 -0700

    old string without stringview
---
 include/tvm/runtime/container/string.h | 35 +++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/tvm/runtime/container/string.h b/include/tvm/runtime/container/string.h
index 5ecd89e9f5..28b0358014 100644
--- a/include/tvm/runtime/container/string.h
+++ b/include/tvm/runtime/container/string.h
@@ -36,9 +36,36 @@
 #include <initializer_list>
 #include <memory>
 #include <string>
+#include <unordered_map>
+#include <utility>
+// We use c++14 std::experimental::string_view for optimizing hash computation
+// only right now, its usage is limited in this file. Any broader usage of
+// std::experiment in our core codebase is discouraged and needs community
+// discussion for each use case. Reference for feature test macros of
+// string_view:
+// https://isocpp.org/std/standing-documents/sd-6-sg10-feature-test-recommendations
+// https://en.cppreference.com/w/User:D41D8CD98F/feature_testing_macros
+#if defined(__cpp_lib_experimental_string_view) && __cpp_lib_experimental_string_view >= 201411
+#define TVM_USE_CXX14_STRING_VIEW_HASH 1
+#else
+#define TVM_USE_CXX14_STRING_VIEW_HASH 0
+#endif
+
+// Tested with clang version 9.0.1 and c++17. It will detect string_view support
+// correctly.
+#if defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606
+#define TVM_USE_CXX17_STRING_VIEW_HASH 1
+#else
+#define TVM_USE_CXX17_STRING_VIEW_HASH 0
+#endif
+
+#if TVM_USE_CXX17_STRING_VIEW_HASH
 #include <string_view>
+#elif TVM_USE_CXX14_STRING_VIEW_HASH
+#include <experimental/string_view>
+#endif
+
 #include <type_traits>
-#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -250,7 +277,13 @@ class String : public ObjectRef {
   static size_t HashBytes(const char* data, size_t size) {
     // This function falls back to string copy with c++11 compiler and is
     // recommended to be compiled with c++14
+#if TVM_USE_CXX17_STRING_VIEW_HASH
     return std::hash<std::string_view>()(std::string_view(data, size));
+#elif TVM_USE_CXX14_STRING_VIEW_HASH
+    return std::hash<std::experimental::string_view>()(std::experimental::string_view(data, size));
+#else
+    return std::hash<std::string>()(std::string(data, size));
+#endif
   }
 
   TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(String, ObjectRef, StringObj);


[tvm] 05/20: update configs

Posted by an...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit 52e39697f31974f60e215e7ce90e1c4905ac9ee8
Author: Andrew Luo <an...@gmail.com>
AuthorDate: Wed Aug 17 11:01:56 2022 -0700

    update configs
---
 python/tvm/meta_schedule/default_config.py | 111 +++++++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/python/tvm/meta_schedule/default_config.py b/python/tvm/meta_schedule/default_config.py
index 652f09261b..73ba0e4fa8 100644
--- a/python/tvm/meta_schedule/default_config.py
+++ b/python/tvm/meta_schedule/default_config.py
@@ -20,9 +20,11 @@ import logging
 from os import path as osp
 from typing import Callable, Dict, List, Optional, Union
 
+from tvm._ffi.registry import register_func
+from tvm.contrib import nvcc
 from tvm.ir import IRModule
 from tvm.target import Target
-from tvm.tir import PrimFunc
+from tvm.tir import PrimFunc, tensor_intrin
 
 from .builder import Builder, LocalBuilder
 from .cost_model import CostModel, XGBModel
@@ -43,6 +45,20 @@ FnPostproc = Callable[[], List[Postproc]]
 FnMutatorProb = Callable[[], Dict[Mutator, float]]
 
 
+def target_has_vnni(target):
+    return target in {
+        "cascadelake",
+        "icelake-client",
+        "icelake-server",
+        "rocketlake",
+        "tigerlake",
+        "cooperlake",
+        "sapphirerapids",
+        "alderlake",
+    }
+
+
+@register_func("tvm.meta_schedule.tune.parse_mod")  # for use in ApplyHistoryBest
 def mod(mod: Union[PrimFunc, IRModule]) -> IRModule:  # pylint: disable=redefined-outer-name
     """Normalize the input to an IRModule"""
     if isinstance(mod, PrimFunc):
@@ -174,9 +190,13 @@ def schedule_rules(  # pylint: disable=redefined-outer-name
         return sch_rules()
     if sch_rules is not None:
         raise TypeError(f"Expected `sch_rules` to be None or callable, but gets: {sch_rules}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.schedule_rules()
         return _DefaultLLVM.schedule_rules()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.schedule_rules()
         return _DefaultCUDA.schedule_rules()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -190,9 +210,13 @@ def postproc(  # pylint: disable=redefined-outer-name
         return postproc()
     if postproc is not None:
         raise TypeError(f"Expected `postproc` to be None or callable, but gets: {postproc}")
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.postprocs()
         return _DefaultLLVM.postprocs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.postprocs()
         return _DefaultCUDA.postprocs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -208,9 +232,13 @@ def mutator_probs(  # pylint: disable=redefined-outer-name
         raise TypeError(
             f"Expected `mutator_probs` to be None or callable, but gets: {mutator_probs}"
         )
-    if target.kind.name in ["llvm", "hexagon"]:
+    if target.kind.name == "llvm":
+        if target_has_vnni(target.mcpu):
+            return _DefaultLLVMVNNI.mutator_probs()
         return _DefaultLLVM.mutator_probs()
     if target.kind.name in ["cuda", "rocm", "vulkan"]:
+        if target.kind.name == "cuda" and nvcc.have_tensorcore(target=target):
+            return _DefaultCUDATensorCore.mutator_probs()
         return _DefaultCUDA.mutator_probs()
     raise ValueError(f"Unsupported target: {target}")
 
@@ -277,6 +305,77 @@ class _DefaultLLVM:
         }
 
 
+class _DefaultLLVMVNNI:
+    """Default tuning configuration for LLVM with VNNI."""
+
+    @staticmethod
+    def schedule_rules() -> List[ScheduleRule]:
+        from tvm.meta_schedule import schedule_rule as M
+
+        logger.info("Using schedule rule: LLVM VNNI")
+
+        return [
+            M.AutoInline(
+                into_producer=False,
+                into_consumer=True,
+                inline_const_tensor=True,
+                disallow_if_then_else=True,
+                require_injective=True,
+                require_ordered=True,
+                disallow_op=["tir.exp"],
+            ),
+            M.AddRFactor(max_jobs_per_core=16, max_innermost_factor=64),
+            M.MultiLevelTilingWithIntrin(
+                tensor_intrin.VNNI_DOT_16x4_INTRIN,
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.MultiLevelTiling(
+                structure="SSRSRS",
+                tile_binds=None,
+                max_innermost_factor=64,
+                vector_load_lens=None,
+                reuse_read=None,
+                reuse_write=M.ReuseType(
+                    req="may",
+                    levels=[1, 2],
+                    scope="global",
+                ),
+            ),
+            M.ParallelizeVectorizeUnroll(
+                max_jobs_per_core=16,
+                max_vectorize_extent=64,
+                unroll_max_steps=[0, 16, 64, 512],
+                unroll_explicit=True,
+            ),
+            M.RandomComputeLocation(),
+        ]
+
+    @staticmethod
+    def postprocs() -> List[Postproc]:
+        from tvm.meta_schedule import postproc as M
+
+        return [
+            M.DisallowDynamicLoop(),
+            M.RewriteParallelVectorizeUnroll(),
+            M.RewriteReductionBlock(),
+            M.RewriteTensorize(vectorize_init_loop=True),
+            M.RewriteLayout(),
+        ]
+
+    @staticmethod
+    def mutator_probs() -> Dict[Mutator, float]:
+        return _DefaultLLVM.mutator_probs()
+
+
 class _DefaultCUDA:
     """Default tuning configuration for CUDA."""
 
@@ -355,10 +454,12 @@ class _DefaultCUDATensorCore:
         from tvm.meta_schedule import schedule_rule as M
         from tvm.tir.tensor_intrin.cuda import get_wmma_intrin_group
 
+        logger.info("Using schedule rule: CUDA tensorcore")
+
         return [
             M.MultiLevelTilingTensorCore(
                 intrin_groups=[
-                    get_wmma_intrin_group(
+                    tensor_intrin.get_wmma_intrin_group(
                         store_scope="shared",
                         in_dtype=in_dtype,
                         out_dtype=out_dtype,