You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/04/20 16:47:45 UTC

[tvm] branch main updated: [QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)

This is an automated email from the ASF dual-hosted git repository.

andrewzhaoluo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 58b7a5a268 [QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)
58b7a5a268 is described below

commit 58b7a5a268435c34eca36f6c0394d9548b850f98
Author: Sevin F. Varoglu <sf...@octoml.ai>
AuthorDate: Wed Apr 20 09:47:37 2022 -0700

    [QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)
    
    * Support input scale and zp of 1-element vector in qnn.conv2d_transpose
    
    * Lint
---
 python/tvm/relay/qnn/op/legalizations.py          | 30 ++++++++++++++++++-----
 src/relay/qnn/op/convolution_transpose.cc         | 14 +++++++++--
 tests/python/relay/test_op_qnn_conv2_transpose.py | 25 +++++++++++++++++++
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index e669e14032..6a17a14eb6 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -92,12 +92,30 @@ def qnn_conv2d_transpose_legalize(attrs, inputs, types):
     # Collect the input exprs.
     data, kernel, input_zero_point, kernel_zero_point, _, _ = inputs
 
-    shift_data = relay.subtract(
-        relay.cast(data, dtype="int16"), relay.cast(input_zero_point, "int16")
-    )
-    shift_kernel = relay.subtract(
-        relay.cast(kernel, dtype="int16"), relay.cast(kernel_zero_point, "int16")
-    )
+    # If input zero point is a scalar, we can directly subtract it.
+    if len(types[2].shape) == 0:
+        shift_data = relay.subtract(
+            relay.cast(data, dtype="int16"), relay.cast(input_zero_point, "int16")
+        )
+    # Otherwise it needs to be broadcast.
+    else:
+        shift_data = relay.nn.bias_add(
+            relay.cast(data, dtype="int16"),
+            -relay.cast(input_zero_point, dtype="int16"),
+        )
+
+    # If kernel zero point is a scalar, we can directly subtract it.
+    if len(types[3].shape) == 0:
+        shift_kernel = relay.subtract(
+            relay.cast(kernel, dtype="int16"), relay.cast(kernel_zero_point, "int16")
+        )
+    # Otherwise it needs to be broadcast.
+    else:
+        shift_kernel = relay.nn.bias_add(
+            relay.cast(kernel, dtype="int16"),
+            -relay.cast(kernel_zero_point, dtype="int16"),
+        )
+
     return relay.nn.conv2d_transpose(shift_data, shift_kernel, **attrs)
 
 
diff --git a/src/relay/qnn/op/convolution_transpose.cc b/src/relay/qnn/op/convolution_transpose.cc
index 9710d1fd7a..6163e1c204 100644
--- a/src/relay/qnn/op/convolution_transpose.cc
+++ b/src/relay/qnn/op/convolution_transpose.cc
@@ -107,12 +107,22 @@ bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs
       return false;
     }
   }
-  ICHECK(IsScalarType(types[2], DataType::Int(32)));  // input_zero_point
 
   const auto* weight_zp_type = types[3].as<TensorTypeNode>();
   ICHECK(weight_zp_type->dtype == DataType::Int(32));  // weight_zero_point
 
-  ICHECK(IsScalarType(types[4], DataType::Float(32)));  // input_scale
+  bool input_zp_is_scalar = (types[2].as<TensorTypeNode>())->shape.size() == 0 ||
+                            get_const_int((types[2].as<TensorTypeNode>())->Size()) == 1;
+  bool input_scale_is_scalar = (types[4].as<TensorTypeNode>())->shape.size() == 0 ||
+                               get_const_int((types[4].as<TensorTypeNode>())->Size()) == 1;
+
+  ICHECK(input_scale_is_scalar && input_zp_is_scalar)
+      << "Zero point or scale should be scalar or a vector with one element.";
+
+  // Assign types for input scale and zero point.
+  AssignType(types[2], DataType::Int(32), Integer(1), reporter);    // input_zero_point
+  AssignType(types[4], DataType::Float(32), Integer(1), reporter);  // input_scale
+
   // Kernel scale can be a vector of length output_channels or a scalar.
   if (param->groups == 1) {
     size_t axis = param->kernel_layout.find('O');
diff --git a/tests/python/relay/test_op_qnn_conv2_transpose.py b/tests/python/relay/test_op_qnn_conv2_transpose.py
index 9ce080b608..ec273eb2f7 100644
--- a/tests/python/relay/test_op_qnn_conv2_transpose.py
+++ b/tests/python/relay/test_op_qnn_conv2_transpose.py
@@ -647,6 +647,31 @@ def test_broadcast_layout():
         libs = relay.build(mod, "llvm -mcpu=skylake-avx512")
 
 
+def test_non_scalar_input_scale_zp():
+    data_shape = (2, 1, 2, 4)
+    data_dtype = "uint8"
+    kernel_shape = (1, 3, 2, 2)
+    kernel_dtype = "uint8"
+    ref_func, qnn_func = get_funcs(
+        data_shape=data_shape,
+        data_dtype=data_dtype,
+        kernel_shape=kernel_shape,
+        kernel_dtype=kernel_dtype,
+        input_zero_point=[0],
+        kernel_zero_point=0,
+        input_scale=[1.0],
+        kernel_scale=1.0,
+        kernel_size=(2, 2),
+        padding=(0, 0),
+        strides=(1, 1),
+        dilation=(1, 1),
+        data_layout="NCHW",
+        kernel_layout="IOHW",
+        out_dtype="int32",
+    )
+    verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype)
+
+
 def test_per_channel_kernel_scale():
     data_shape = (2, 1, 2, 4)
     data_dtype = "uint8"