You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ke...@apache.org on 2020/07/02 05:59:33 UTC
[incubator-tvm] branch master updated: [TOPI] Fix x86 conv2d template when tuning with unpacked layout (#5938)

This is an automated email from the ASF dual-hosted git repository.

kevinthesun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/master by this push:
     new 512ed39  [TOPI] Fix x86 conv2d template when tuning with unpacked layout (#5938)
512ed39 is described below

commit 512ed3930a61daf38e80e1f71e51f0d1f139fb8e
Author: Lianmin Zheng <li...@gmail.com>
AuthorDate: Wed Jul 1 22:59:21 2020 -0700

    [TOPI] Fix x86 conv2d template when tuning with unpacked layout (#5938)
    
    * fix x86 conv2d and conv2d_transpose template
    
    * address comments
---
 topi/python/topi/x86/conv2d_avx_1x1.py    |  2 +-
 topi/python/topi/x86/conv2d_avx_common.py |  2 +-
 topi/python/topi/x86/conv2d_transpose.py  | 14 ++++++++------
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/topi/python/topi/x86/conv2d_avx_1x1.py b/topi/python/topi/x86/conv2d_avx_1x1.py
index 978c4b9..c6ed832 100644
--- a/topi/python/topi/x86/conv2d_avx_1x1.py
+++ b/topi/python/topi/x86/conv2d_avx_1x1.py
@@ -73,6 +73,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last):
         s[data_vec].parallel(parallel_axis)
         data_vec = data_vec.op.input_tensors[0]
 
+    oc_bn = cfg["tile_oc"].size[-1]
     if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \
             kernel_vec.name == 'kernel_vec':
         # data and kernel are not pre-computed, schedule layout transform here.
@@ -84,7 +85,6 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last):
 
         oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis
         s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block)
-        oc_bn = cfg["tile_oc"].size[-1]
         if oc_bn > 1:
             s[kernel_vec].vectorize(oc_block)
         parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
diff --git a/topi/python/topi/x86/conv2d_avx_common.py b/topi/python/topi/x86/conv2d_avx_common.py
index a88d168..aea954f 100644
--- a/topi/python/topi/x86/conv2d_avx_common.py
+++ b/topi/python/topi/x86/conv2d_avx_common.py
@@ -95,6 +95,7 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last):
         s[data_vec].parallel(parallel_axis)
         data_vec = data_vec.op.input_tensors[0]
 
+    oc_bn = cfg["tile_oc"].size[-1]
     if isinstance(kernel_vec.op, tvm.te.ComputeOp) and \
             kernel_vec.name == 'kernel_vec':
         # data and kernel are not pre-computed, schedule layout transform here.
@@ -106,7 +107,6 @@ def _schedule_conv_NCHWc(s, cfg, data_vec, kernel_vec, conv_out, last):
 
         oc_chunk, ic_chunk, oh, ow, ic_block, oc_block = s[kernel_vec].op.axis
         s[kernel_vec].reorder(oc_chunk, oh, ic_chunk, ow, ic_block, oc_block)
-        oc_bn = cfg["tile_oc"].size[-1]
         if oc_bn > 1:
             s[kernel_vec].vectorize(oc_block)
         parallel_axis = s[kernel_vec].fuse(oc_chunk, oh)
diff --git a/topi/python/topi/x86/conv2d_transpose.py b/topi/python/topi/x86/conv2d_transpose.py
index d490b28..7ec2817 100644
--- a/topi/python/topi/x86/conv2d_transpose.py
+++ b/topi/python/topi/x86/conv2d_transpose.py
@@ -40,14 +40,16 @@ def schedule_conv2d_transpose_nchw(outs):
             conv_out = op.input_tensors[0]
             # retrieve data
             data_vec = conv_out.op.input_tensors[0]
-            data_pad = data_vec.op.input_tensors[0]
-            data_dilate = data_pad.op.input_tensors[0]
-            s[data_dilate].compute_inline()
-            s[data_pad].compute_inline()
+            if isinstance(data_vec, te.ComputeOp):
+                data_pad = data_vec.op.input_tensors[0]
+                data_dilate = data_pad.op.input_tensors[0]
+                s[data_dilate].compute_inline()
+                s[data_pad].compute_inline()
             # retrieve kernel
             kernel_vec = conv_out.op.input_tensors[1]
-            kernel_transform = kernel_vec.op.input_tensors[0]
-            s[kernel_transform].compute_inline()
+            if isinstance(kernel_vec, te.ComputeOp):
+                kernel_transform = kernel_vec.op.input_tensors[0]
+                s[kernel_transform].compute_inline()
 
     traverse_inline(s, outs[0].op, _callback)
     return s