You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ar...@apache.org on 2021/09/30 23:28:42 UTC
[tvm] branch main updated: Issue8717 x86 dws conv2d schedule (#9092)

This is an automated email from the ASF dual-hosted git repository.

areusch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 12330ca  Issue8717 x86 dws conv2d schedule (#9092)
12330ca is described below

commit 12330ca2e678e2e8842929a2b90b63bf67e61108
Author: Sergei Smirnov <89...@users.noreply.github.com>
AuthorDate: Thu Sep 30 16:28:11 2021 -0700

    Issue8717 x86 dws conv2d schedule (#9092)
    
    * [microTVM] Update support for ARMv7m intrinsic
    
     - Improved implementaion of gemm function for conv2d
     - Removed %4 restriction for channels
     - Added test case to verify SMLAD intrinsic speed acceleration
    
    Signed-off-by: Sergey Smirnov <Se...@grovety.com>
    
    * [microTVM] Update support for ARMv7m intrinsic
    
     - Improved implementaion of gemm function for conv2d
     - Removed %4 restriction for channels
     - Added test case to verify SMLAD intrinsic speed acceleration
    
    Signed-off-by: Sergey Smirnov <Se...@grovety.com>
    
    * Issue 8717 Add schedule for depthwise_conv2d_nhwc
    
    * Implemented discussed changes.
    
    * Removed unnecessary test files.
    
    * Formatting fixed.
    
    * Formatting fixed2.
    
    * Formatting fixed3.
    
    * Formatting fixed4.
    
    * Formatting fixed5.
    
    * Fixed test time result checking.
    
    * Check rebuild.
    
    * Formatting fixed.
    
    * Formatting fixed.
    
    * Add default DepthwiseConv2D schedule in NHWC layout for arm cpu
    
    * Fixed micro model library test. Checking size reduced to 16 bytes from 2466816.
    
    * Revert "Merge branch 'update-arm-simd-intrinsic' of https://github.com/sergey-grovety/tvm into issue8717-x86-DwsConv2d-schedule"
    
    This reverts commit e927567058403bcc9e4fdc3d24828b3dcd6a661b, reversing
    changes made to 0ccb5a01495d02f521eea2af9efa6a3153c4f72b.
    
    * Revert "fix test_export_model_library_format_workspace"
    
    This reverts commit 32ede712ada81242f435693403a78d98adf9afeb.
    fix format
    move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc
    fix test_export_model_library_format_workspace
    use x86 depthwise_conv2d_nhwc schedule for arm_cpu
    Add x86 schedule for depthwise_conv2d_nhwc
    
    # Conflicts:
    #	python/tvm/relay/op/strategy/arm_cpu.py
    
    * move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc
    fix format
    Revert "fix test_export_model_library_format_workspace"
    added a missing comma
    
    * Revert wrong merge changes
    
    * empty commit to force pipeline restart
    
    * Add condition to use compute_at for generic schedule_depthwise_conv2d_nhwc
    
    Co-authored-by: Sergey Smirnov <Se...@mir.dev>
    Co-authored-by: Alex-grovety <Al...@mir.dev>
---
 python/tvm/relay/op/strategy/arm_cpu.py            | 19 +++++++++----
 python/tvm/topi/generic/conv2d.py                  | 31 +++++++++++++++++++++-
 .../topi/python/test_topi_depthwise_conv2d.py      |  6 ++++-
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py
index 005eae6..2d331d0 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -21,6 +21,7 @@ import logging
 
 from tvm import topi
 from ....target import arm_isa
+from ....topi.generic import conv2d as conv2d_generic
 from .generic import *
 from .. import op as _op
 
@@ -197,11 +198,19 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
                 )
         elif layout == "NHWC":
             assert kernel_layout == "HWOI"
-            strategy.add_implementation(
-                wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
-                wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
-                name="depthwise_conv2d_nhwc.arm_cpu",
-            )
+            is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
+            if is_aarch64 or "+neon" in target.mattr:
+                strategy.add_implementation(
+                    wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
+                    wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
+                    name="depthwise_conv2d_nhwc.arm_cpu",
+                )
+            else:
+                strategy.add_implementation(
+                    wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
+                    wrap_topi_schedule(conv2d_generic.schedule_depthwise_conv2d_nhwc),
+                    name="depthwise_conv2d_nhwc.generic",
+                )
         else:
             raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout))
     else:  # group_conv2d
diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py
index 4daa84c..3772fdb 100644
--- a/python/tvm/topi/generic/conv2d.py
+++ b/python/tvm/topi/generic/conv2d.py
@@ -20,7 +20,7 @@
 from tvm import te
 from tvm import autotvm
 from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
-from ..utils import get_const_tuple
+from ..utils import get_const_tuple, traverse_inline
 
 
 def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements):
@@ -361,3 +361,32 @@ def schedule_conv_NCHWc_cpu_1x1_int8(
             raise ValueError("Unsupported output ndim: %s" % out_ndim)
 
     return s
+
+
+def schedule_depthwise_conv2d_nhwc(outs):
+    """Create schedule for depthwise conv2d in NHWC layout.
+    Parameters
+    ----------
+    outs : list[te.tensor.Tensor]
+            The output tensors.
+    Returns
+    -------
+    s : tvm.te.schedule.Schedule
+        The computation schedule for depthwise conv2d.
+    """
+    outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+    s = te.create_schedule([x.op for x in outs])
+
+    def _callback(op):
+        """Traverse operators from computation graph"""
+        if "depthwise_conv2d_nhwc" in op.tag:
+            out = outs[0]
+            depthwise_conv2d_out = op.output(0)
+            data_pad = depthwise_conv2d_out.op.input_tensors[0]
+            s[data_pad].compute_inline()
+            if depthwise_conv2d_out != out:
+                s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3])
+            s[out].fuse(*s[out].op.axis)
+
+    traverse_inline(s, outs[0].op, _callback)
+    return s
diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py
index 27601cd..24c2321 100644
--- a/tests/python/topi/python/test_topi_depthwise_conv2d.py
+++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py
@@ -30,6 +30,7 @@ from tvm.topi.nn.utils import get_pad_tuple
 from tvm.contrib.pickle_memoize import memoize
 from tvm.topi.nn.depthwise_conv2d import _get_workload
 from tvm.topi.x86.depthwise_conv2d import _fallback_schedule
+from tvm.topi.generic import conv2d as conv2d_generic
 
 
 _depthwise_conv2d_implement = {
@@ -53,7 +54,10 @@ _depthwise_conv2d_implement = {
         ],
     },
     "NHWC": {
-        "generic": [(topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc)],
+        "generic": [
+            (topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc),
+            (topi.nn.depthwise_conv2d_nhwc, conv2d_generic.schedule_depthwise_conv2d_nhwc),
+        ],
         "arm_cpu": [
             (
                 topi.arm_cpu.compute_depthwise_conv2d_nhwc,