You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ec...@apache.org on 2023/05/03 20:18:48 UTC
[tvm] branch main updated: [TOPI] Vectorize depthwise conv2d output operator (#14519)
This is an automated email from the ASF dual-hosted git repository.
echuraev pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new cc8cce50fa [TOPI] Vectorize depthwise conv2d output operator (#14519)
cc8cce50fa is described below
commit cc8cce50fabb5a92e5830bb81b5fa96fb613a698
Author: Jack Frankland <30...@users.noreply.github.com>
AuthorDate: Wed May 3 21:18:41 2023 +0100
[TOPI] Vectorize depthwise conv2d output operator (#14519)
Depthwise Conv2D operations may consists of a convolution + an output
operator e.g. Relu. This commit will:
* Apply vectorization across the inner channel loop when there is an
output operator.
* Remove some unused variables in `schedule_depthwise_conv2d_nhwc`.
* Limit the loop splitting to 8 elements in the inner loop.
---
python/tvm/topi/arm_cpu/depthwise_conv2d.py | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/python/tvm/topi/arm_cpu/depthwise_conv2d.py b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
index b6c15a30c0..59660e6bb9 100644
--- a/python/tvm/topi/arm_cpu/depthwise_conv2d.py
+++ b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
@@ -292,13 +292,13 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs):
out = outs[0]
##### space definition begin #####
- n, h, w, c = s[out].op.axis
+ _, h, w, c = s[out].op.axis
# Split the number of input/output channels
- cfg.define_split("tile_c", c, num_outputs=2)
+ cfg.define_split("tile_c", c, num_outputs=2, filter=lambda entry: entry.size[1] <= 8)
# Split the height of the convolution
- _, hi = cfg.define_split("tile_h", h, num_outputs=2)
+ cfg.define_split("tile_h", h, num_outputs=2)
# Split the width of the convolution
- _, wi = cfg.define_split("tile_w", w, num_outputs=2)
+ cfg.define_split("tile_w", w, num_outputs=2)
# Additional out (e.g., requantization, bias addition, etc..)
# 0: locate the output on the second last axis of the main compuation
# 1: locate the output closest to the main computation
@@ -394,7 +394,8 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs):
ci_outer, ci_inner = s[out].split(ci, 4)
s[out].vectorize(ci_inner)
s[out].unroll(ci_outer)
-
+ else:
+ s[out].vectorize(ci)
fused_n_ho = s[out].fuse(n, ho)
return hi, wi, fused_n_ho