You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ec...@apache.org on 2023/05/03 20:18:48 UTC

[tvm] branch main updated: [TOPI] Vectorize depthwise conv2d output operator (#14519)

This is an automated email from the ASF dual-hosted git repository.

echuraev pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new cc8cce50fa [TOPI] Vectorize depthwise conv2d output operator (#14519)
cc8cce50fa is described below

commit cc8cce50fabb5a92e5830bb81b5fa96fb613a698
Author: Jack Frankland <30...@users.noreply.github.com>
AuthorDate: Wed May 3 21:18:41 2023 +0100

    [TOPI] Vectorize depthwise conv2d output operator (#14519)
    
    Depthwise Conv2D operations may consists of a convolution + an output
    operator e.g. Relu. This commit will:
    * Apply vectorization across the inner channel loop when there is an
      output operator.
    * Remove some unused variables in `schedule_depthwise_conv2d_nhwc`.
    * Limit the loop splitting to 8 elements in the inner loop.
---
 python/tvm/topi/arm_cpu/depthwise_conv2d.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/tvm/topi/arm_cpu/depthwise_conv2d.py b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
index b6c15a30c0..59660e6bb9 100644
--- a/python/tvm/topi/arm_cpu/depthwise_conv2d.py
+++ b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
@@ -292,13 +292,13 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs):
     out = outs[0]
 
     ##### space definition begin #####
-    n, h, w, c = s[out].op.axis
+    _, h, w, c = s[out].op.axis
     # Split the number of input/output channels
-    cfg.define_split("tile_c", c, num_outputs=2)
+    cfg.define_split("tile_c", c, num_outputs=2, filter=lambda entry: entry.size[1] <= 8)
     # Split the height of the convolution
-    _, hi = cfg.define_split("tile_h", h, num_outputs=2)
+    cfg.define_split("tile_h", h, num_outputs=2)
     # Split the width of the convolution
-    _, wi = cfg.define_split("tile_w", w, num_outputs=2)
+    cfg.define_split("tile_w", w, num_outputs=2)
     # Additional out (e.g., requantization, bias addition, etc..)
     # 0: locate the output on the second last axis of the main compuation
     # 1: locate the output closest to the main computation
@@ -394,7 +394,8 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs):
             ci_outer, ci_inner = s[out].split(ci, 4)
             s[out].vectorize(ci_inner)
             s[out].unroll(ci_outer)
-
+        else:
+            s[out].vectorize(ci)
         fused_n_ho = s[out].fuse(n, ho)
         return hi, wi, fused_n_ho