You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/04/20 16:47:45 UTC
[tvm] branch main updated: [QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)
This is an automated email from the ASF dual-hosted git repository.
andrewzhaoluo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 58b7a5a268 [QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)
58b7a5a268 is described below
commit 58b7a5a268435c34eca36f6c0394d9548b850f98
Author: Sevin F. Varoglu <sf...@octoml.ai>
AuthorDate: Wed Apr 20 09:47:37 2022 -0700
[QNN] Support input scale and zp of 1-element vector in qnn.conv2d_transpose (#10952)
* Support input scale and zp of 1-element vector in qnn.conv2d_transpose
* Lint
---
python/tvm/relay/qnn/op/legalizations.py | 30 ++++++++++++++++++-----
src/relay/qnn/op/convolution_transpose.cc | 14 +++++++++--
tests/python/relay/test_op_qnn_conv2_transpose.py | 25 +++++++++++++++++++
3 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py
index e669e14032..6a17a14eb6 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -92,12 +92,30 @@ def qnn_conv2d_transpose_legalize(attrs, inputs, types):
# Collect the input exprs.
data, kernel, input_zero_point, kernel_zero_point, _, _ = inputs
- shift_data = relay.subtract(
- relay.cast(data, dtype="int16"), relay.cast(input_zero_point, "int16")
- )
- shift_kernel = relay.subtract(
- relay.cast(kernel, dtype="int16"), relay.cast(kernel_zero_point, "int16")
- )
+ # If input zero point is a scalar, we can directly subtract it.
+ if len(types[2].shape) == 0:
+ shift_data = relay.subtract(
+ relay.cast(data, dtype="int16"), relay.cast(input_zero_point, "int16")
+ )
+ # Otherwise it needs to be broadcast.
+ else:
+ shift_data = relay.nn.bias_add(
+ relay.cast(data, dtype="int16"),
+ -relay.cast(input_zero_point, dtype="int16"),
+ )
+
+ # If kernel zero point is a scalar, we can directly subtract it.
+ if len(types[3].shape) == 0:
+ shift_kernel = relay.subtract(
+ relay.cast(kernel, dtype="int16"), relay.cast(kernel_zero_point, "int16")
+ )
+ # Otherwise it needs to be broadcast.
+ else:
+ shift_kernel = relay.nn.bias_add(
+ relay.cast(kernel, dtype="int16"),
+ -relay.cast(kernel_zero_point, dtype="int16"),
+ )
+
return relay.nn.conv2d_transpose(shift_data, shift_kernel, **attrs)
diff --git a/src/relay/qnn/op/convolution_transpose.cc b/src/relay/qnn/op/convolution_transpose.cc
index 9710d1fd7a..6163e1c204 100644
--- a/src/relay/qnn/op/convolution_transpose.cc
+++ b/src/relay/qnn/op/convolution_transpose.cc
@@ -107,12 +107,22 @@ bool QnnConv2DTransposeRel(const Array<Type>& types, int num_inputs, const Attrs
return false;
}
}
- ICHECK(IsScalarType(types[2], DataType::Int(32))); // input_zero_point
const auto* weight_zp_type = types[3].as<TensorTypeNode>();
ICHECK(weight_zp_type->dtype == DataType::Int(32)); // weight_zero_point
- ICHECK(IsScalarType(types[4], DataType::Float(32))); // input_scale
+ bool input_zp_is_scalar = (types[2].as<TensorTypeNode>())->shape.size() == 0 ||
+ get_const_int((types[2].as<TensorTypeNode>())->Size()) == 1;
+ bool input_scale_is_scalar = (types[4].as<TensorTypeNode>())->shape.size() == 0 ||
+ get_const_int((types[4].as<TensorTypeNode>())->Size()) == 1;
+
+ ICHECK(input_scale_is_scalar && input_zp_is_scalar)
+ << "Zero point or scale should be scalar or a vector with one element.";
+
+ // Assign types for input scale and zero point.
+ AssignType(types[2], DataType::Int(32), Integer(1), reporter); // input_zero_point
+ AssignType(types[4], DataType::Float(32), Integer(1), reporter); // input_scale
+
// Kernel scale can be a vector of length output_channels or a scalar.
if (param->groups == 1) {
size_t axis = param->kernel_layout.find('O');
diff --git a/tests/python/relay/test_op_qnn_conv2_transpose.py b/tests/python/relay/test_op_qnn_conv2_transpose.py
index 9ce080b608..ec273eb2f7 100644
--- a/tests/python/relay/test_op_qnn_conv2_transpose.py
+++ b/tests/python/relay/test_op_qnn_conv2_transpose.py
@@ -647,6 +647,31 @@ def test_broadcast_layout():
libs = relay.build(mod, "llvm -mcpu=skylake-avx512")
+def test_non_scalar_input_scale_zp():
+ data_shape = (2, 1, 2, 4)
+ data_dtype = "uint8"
+ kernel_shape = (1, 3, 2, 2)
+ kernel_dtype = "uint8"
+ ref_func, qnn_func = get_funcs(
+ data_shape=data_shape,
+ data_dtype=data_dtype,
+ kernel_shape=kernel_shape,
+ kernel_dtype=kernel_dtype,
+ input_zero_point=[0],
+ kernel_zero_point=0,
+ input_scale=[1.0],
+ kernel_scale=1.0,
+ kernel_size=(2, 2),
+ padding=(0, 0),
+ strides=(1, 1),
+ dilation=(1, 1),
+ data_layout="NCHW",
+ kernel_layout="IOHW",
+ out_dtype="int32",
+ )
+ verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype)
+
+
def test_per_channel_kernel_scale():
data_shape = (2, 1, 2, 4)
data_dtype = "uint8"