You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ar...@apache.org on 2021/09/30 23:28:42 UTC
[tvm] branch main updated: Issue8717 x86 dws conv2d schedule (#9092)
This is an automated email from the ASF dual-hosted git repository.
areusch pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 12330ca Issue8717 x86 dws conv2d schedule (#9092)
12330ca is described below
commit 12330ca2e678e2e8842929a2b90b63bf67e61108
Author: Sergei Smirnov <89...@users.noreply.github.com>
AuthorDate: Thu Sep 30 16:28:11 2021 -0700
Issue8717 x86 dws conv2d schedule (#9092)
* [microTVM] Update support for ARMv7m intrinsic
- Improved implementaion of gemm function for conv2d
- Removed %4 restriction for channels
- Added test case to verify SMLAD intrinsic speed acceleration
Signed-off-by: Sergey Smirnov <Se...@grovety.com>
* [microTVM] Update support for ARMv7m intrinsic
- Improved implementaion of gemm function for conv2d
- Removed %4 restriction for channels
- Added test case to verify SMLAD intrinsic speed acceleration
Signed-off-by: Sergey Smirnov <Se...@grovety.com>
* Issue 8717 Add schedule for depthwise_conv2d_nhwc
* Implemented discussed changes.
* Removed unnecessary test files.
* Formatting fixed.
* Formatting fixed2.
* Formatting fixed3.
* Formatting fixed4.
* Formatting fixed5.
* Fixed test time result checking.
* Check rebuild.
* Formatting fixed.
* Formatting fixed.
* Add default DepthwiseConv2D schedule in NHWC layout for arm cpu
* Fixed micro model library test. Checking size reduced to 16 bytes from 2466816.
* Revert "Merge branch 'update-arm-simd-intrinsic' of https://github.com/sergey-grovety/tvm into issue8717-x86-DwsConv2d-schedule"
This reverts commit e927567058403bcc9e4fdc3d24828b3dcd6a661b, reversing
changes made to 0ccb5a01495d02f521eea2af9efa6a3153c4f72b.
* Revert "fix test_export_model_library_format_workspace"
This reverts commit 32ede712ada81242f435693403a78d98adf9afeb.
fix format
move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc
fix test_export_model_library_format_workspace
use x86 depthwise_conv2d_nhwc schedule for arm_cpu
Add x86 schedule for depthwise_conv2d_nhwc
# Conflicts:
# python/tvm/relay/op/strategy/arm_cpu.py
* move schedule_depthwise_conv2d_nhwc to generic conv2d, add test for schedule_depthwise_conv2d_nhwc
fix format
Revert "fix test_export_model_library_format_workspace"
added a missing comma
* Revert wrong merge changes
* empty commit to force pipeline restart
* Add condition to use compute_at for generic schedule_depthwise_conv2d_nhwc
Co-authored-by: Sergey Smirnov <Se...@mir.dev>
Co-authored-by: Alex-grovety <Al...@mir.dev>
---
python/tvm/relay/op/strategy/arm_cpu.py | 19 +++++++++----
python/tvm/topi/generic/conv2d.py | 31 +++++++++++++++++++++-
.../topi/python/test_topi_depthwise_conv2d.py | 6 ++++-
3 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py
index 005eae6..2d331d0 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -21,6 +21,7 @@ import logging
from tvm import topi
from ....target import arm_isa
+from ....topi.generic import conv2d as conv2d_generic
from .generic import *
from .. import op as _op
@@ -197,11 +198,19 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target):
)
elif layout == "NHWC":
assert kernel_layout == "HWOI"
- strategy.add_implementation(
- wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
- wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
- name="depthwise_conv2d_nhwc.arm_cpu",
- )
+ is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
+ if is_aarch64 or "+neon" in target.mattr:
+ strategy.add_implementation(
+ wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
+ wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
+ name="depthwise_conv2d_nhwc.arm_cpu",
+ )
+ else:
+ strategy.add_implementation(
+ wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
+ wrap_topi_schedule(conv2d_generic.schedule_depthwise_conv2d_nhwc),
+ name="depthwise_conv2d_nhwc.generic",
+ )
else:
raise RuntimeError("Unsupported depthwise_conv2d layout {} for arm cpu".format(layout))
else: # group_conv2d
diff --git a/python/tvm/topi/generic/conv2d.py b/python/tvm/topi/generic/conv2d.py
index 4daa84c..3772fdb 100644
--- a/python/tvm/topi/generic/conv2d.py
+++ b/python/tvm/topi/generic/conv2d.py
@@ -20,7 +20,7 @@
from tvm import te
from tvm import autotvm
from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
-from ..utils import get_const_tuple
+from ..utils import get_const_tuple, traverse_inline
def fallback_schedule_cpu_common_int8(cfg, wkl, int32_lanes, num_int8_elements):
@@ -361,3 +361,32 @@ def schedule_conv_NCHWc_cpu_1x1_int8(
raise ValueError("Unsupported output ndim: %s" % out_ndim)
return s
+
+
+def schedule_depthwise_conv2d_nhwc(outs):
+ """Create schedule for depthwise conv2d in NHWC layout.
+ Parameters
+ ----------
+ outs : list[te.tensor.Tensor]
+ The output tensors.
+ Returns
+ -------
+ s : tvm.te.schedule.Schedule
+ The computation schedule for depthwise conv2d.
+ """
+ outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs
+ s = te.create_schedule([x.op for x in outs])
+
+ def _callback(op):
+ """Traverse operators from computation graph"""
+ if "depthwise_conv2d_nhwc" in op.tag:
+ out = outs[0]
+ depthwise_conv2d_out = op.output(0)
+ data_pad = depthwise_conv2d_out.op.input_tensors[0]
+ s[data_pad].compute_inline()
+ if depthwise_conv2d_out != out:
+ s[depthwise_conv2d_out].compute_at(s[out], s[out].op.axis[3])
+ s[out].fuse(*s[out].op.axis)
+
+ traverse_inline(s, outs[0].op, _callback)
+ return s
diff --git a/tests/python/topi/python/test_topi_depthwise_conv2d.py b/tests/python/topi/python/test_topi_depthwise_conv2d.py
index 27601cd..24c2321 100644
--- a/tests/python/topi/python/test_topi_depthwise_conv2d.py
+++ b/tests/python/topi/python/test_topi_depthwise_conv2d.py
@@ -30,6 +30,7 @@ from tvm.topi.nn.utils import get_pad_tuple
from tvm.contrib.pickle_memoize import memoize
from tvm.topi.nn.depthwise_conv2d import _get_workload
from tvm.topi.x86.depthwise_conv2d import _fallback_schedule
+from tvm.topi.generic import conv2d as conv2d_generic
_depthwise_conv2d_implement = {
@@ -53,7 +54,10 @@ _depthwise_conv2d_implement = {
],
},
"NHWC": {
- "generic": [(topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc)],
+ "generic": [
+ (topi.nn.depthwise_conv2d_nhwc, topi.generic.schedule_depthwise_conv2d_nhwc),
+ (topi.nn.depthwise_conv2d_nhwc, conv2d_generic.schedule_depthwise_conv2d_nhwc),
+ ],
"arm_cpu": [
(
topi.arm_cpu.compute_depthwise_conv2d_nhwc,