You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/11/22 06:49:47 UTC

[GitHub] [tvm] ibsidorenko commented on a diff in pull request #12398: [QNN][Hexagon] Disable QNN canonicalization pass

ibsidorenko commented on code in PR #12398:
URL: https://github.com/apache/tvm/pull/12398#discussion_r1028931805


##########
python/tvm/topi/hexagon/qnn/nn.py:
##########
@@ -0,0 +1,667 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Hexagon QNN operators"""
+# pylint: disable=invalid-name
+
+import tvm
+from tvm import te, topi
+from ...utils import get_const_tuple
+from ...nn.utils import get_pad_tuple
+from ...nn.pad import pad
+from ... import tag, nn
+from ...x86.concat import concatenate
+
+
+def clip_cast(val, dtype):
+    # clip + cast:
+    const_min = tvm.tir.min_value(dtype)
+    const_max = tvm.tir.max_value(dtype)
+    return te.max(tvm.te.min(val, const_max), const_min).astype(dtype)
+
+
+def get_qnn_param(param, indices, axis):
+    # Account scalar and 1D quantization parameters:
+    if len(param.shape) == 0:
+        return param
+
+    param_idx = tvm.tir.indexmod(indices[axis], topi.shape(param)[0])
+    return param[param_idx]
+
+
+def default_schedule(outs):
+    """Simple default schedule for QNN ops.
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+        The computation graph description of dense in the format
+        of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    outs = [outs] if isinstance(outs, tvm.te.tensor.Tensor) else outs
+    s = tvm.te.create_schedule([x.op for x in outs])
+    tvm.te.schedule.AutoInlineInjective(s)
+    return s
+
+
+def qnn_quantize(data, output_scale, output_zero_point, axis, out_dtype):
+    """Compute for qnn.quantize
+
+    Q_output = clamp((round(input_tensor/output_scale) + output_zero_point),
+                     out_dtype::min,
+                     out_dtype::max)
+    """
+
+    assert len(output_scale.shape) == 0 or len(output_scale.shape) == 1
+    assert len(output_zero_point.shape) == 0 or len(output_zero_point.shape) == 1
+
+    def _compute(*indices):
+        value = data(*indices)
+        scale = get_qnn_param(output_scale, indices, axis)
+        zp = get_qnn_param(output_zero_point, indices, axis)
+
+        val = te.add(te.round(te.div(value, scale)), zp)
+        return clip_cast(val, out_dtype)
+
+    return te.compute(data.shape, _compute, tag=tag.ELEMWISE)
+
+
+def schedule_qnn_quantize(outs):
+    """Schedule for qnn.quantize
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of qnn.quantize
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return default_schedule(outs)
+
+
+def qnn_dequantize(data, input_scale, input_zero_point, axis):
+    """Compute for qnn.dequantize
+
+    fp_output = input_scale * (Q_input - input_zero_point)
+    """
+
+    def _compute(*indices):
+        value = data(*indices)
+        scale = get_qnn_param(input_scale, indices, axis)
+        zp = get_qnn_param(input_zero_point, indices, axis)
+
+        return te.multiply(scale, te.subtract(value, zp))
+
+    return te.compute(data.shape, _compute, tag=tag.ELEMWISE)
+
+
+def schedule_qnn_dequantize(outs):
+    """Schedule for qnn.dequantize
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of qnn.dequantize
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return default_schedule(outs)
+
+
+def qnn_requantize(data, input_scale, input_zp, output_scale, output_zp, axis, out_dtype):
+    """Compute for qnn.requantize
+
+    Q_output = zp_output + round((scale_input)/(scale_output) * (Q_input - zp_input))
+
+    TODO: support 'rounding' and 'compute_dtype' arguments.
+    """
+
+    def _compute(*indices):
+        value = data(*indices)
+
+        iscale = get_qnn_param(input_scale, indices, axis)
+        oscale = get_qnn_param(output_scale, indices, axis)
+
+        sub = te.subtract(value, input_zp)
+        mul = te.div(iscale, oscale)
+        val = te.add(te.round(te.multiply(mul, sub)), output_zp)
+
+        # clip + cast:
+        const_min = tvm.tir.min_value(out_dtype)
+        const_max = tvm.tir.max_value(out_dtype)
+        return te.max(tvm.te.min(val, const_max), const_min).astype(out_dtype)
+
+    return te.compute(data.shape, _compute)
+
+
+def schedule_qnn_requantize(outs):
+    """Schedule for qnn.requantize
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of qnn.requantize
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return default_schedule(outs)
+
+
+def qnn_add(
+    lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point, output_scale, output_zero_point
+):
+    """Compute for qnn.add
+
+    Q_output = zp_output + round((lhs_scale)/(scale_output) * (lhs_input - lhs_zp_input))
+                         + round((rhs_scale)/(scale_output) * (rhs_input - rhs_zp_input))
+
+    TODO: support 'axis' argument.
+    """
+
+    assert lhs.dtype == rhs.dtype
+    dtype = lhs.dtype
+
+    def _compute(*indices):
+        lvalue = lhs(*indices)
+        rvalue = rhs(*indices)
+        q_lv = te.round(
+            te.multiply(te.div(lhs_scale, output_scale), te.subtract(lvalue, lhs_zero_point))
+        ).astype("int32")
+        q_rv = te.round(
+            te.multiply(te.div(rhs_scale, output_scale), te.subtract(rvalue, rhs_zero_point))
+        ).astype("int32")
+        val = te.add(te.add(q_lv, q_rv), output_zero_point)
+
+        # clip + cast:
+        const_min = tvm.tir.min_value(dtype)
+        const_max = tvm.tir.max_value(dtype)
+        return te.max(tvm.te.min(val, const_max), const_min).astype(dtype)
+
+    return te.compute(lhs.shape, _compute)
+
+
+def schedule_qnn_add(outs):
+    """Schedule for qnn.add
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of qnn.add
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return default_schedule(outs)
+
+
+def requantize_tensor(tensor, i_scale, i_zp, o_scale, o_zp, out_dtype):
+    """Requantize tensor"""
+
+    def _compute(*indices):
+        value = tensor(*indices)
+        mul_value = te.round(
+            te.multiply(te.div(i_scale, o_scale), te.subtract(value, i_zp))
+        ).astype("int32")
+        rq_value = te.add(mul_value, o_zp)
+
+        return clip_cast(rq_value, out_dtype)
+
+    return te.compute(tensor.shape, _compute)
+
+
+def qnn_concatenate(data, axis, out_dtype):
+    """Compute for qnn.concatenate
+
+    Parameters
+    ----------
+    data: Array of Tensor
+          The computation graph description of qnn.concatenate
+          in the format of an array of tensors.
+
+    axis: int
+          The axis along which the tensors are concatenated.
+
+    out_dtype: string
+          Data type of output tensor
+
+    Returns
+    -------
+    out: Tensor
+        The computation for the op.
+    """
+
+    # Get output quantization parameters.
+    o_scale = data[-2]
+    o_zp = data[-1]
+
+    # Initially qnn.concatenate had 3 tuples: (1) tuple with input tensors, (2) tuple with input
+    # scales and (3) tuple with input zero points.
+    # Last 2 elements in data represent output scale and zero point.
+    num_of_tuples = 3
+    assert ((len(data) - 2) % num_of_tuples) == 0
+    args_num = (len(data) - 2) // num_of_tuples
+
+    args = []
+    for i in range(args_num):
+        # Get next tensor and its quantization parameters.
+        tensor = data[i]
+        i_scale = data[i + args_num]
+        i_zp = data[i + args_num * 2]
+
+        # Requantize tensors and add them to the list.
+        args.append(requantize_tensor(tensor, i_scale, i_zp, o_scale, o_zp, out_dtype))
+
+    # Call x86 implementation of concatenate.
+    return concatenate(args, axis)
+
+
+def schedule_qnn_concatenate(outs):
+    """Schedule for qnn.concatenate
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of qnn.add
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return default_schedule(outs)
+
+
+def qnn_conv2d(  # Conv2d inputs
+    data,
+    weight,
+    # Conv2d quantization params:
+    input_zero_point,
+    kernel_zero_point,
+    _input_scale,
+    _kernel_scale,

Review Comment:
   @csullivan yes, you are right, input/kernel scales should be taken into account in subsequent requantize op.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org