You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by an...@apache.org on 2022/09/16 22:47:44 UTC
[tvm] 20/20: div impl
This is an automated email from the ASF dual-hosted git repository.
andrewzhaoluo pushed a commit to branch aluo/rebase-08312022-autotensorization-fq2i-changes
in repository https://gitbox.apache.org/repos/asf/tvm.git
commit bf7d866541cc2425de526c8429f3bf086cb6fe3b
Author: Andrew Zhao Luo <an...@gmail.com>
AuthorDate: Fri Sep 16 15:47:02 2022 -0700
div impl
---
python/tvm/relay/qnn/op/qnn.py | 68 ++++++++++++
.../transform/fake_quantization_to_integer.py | 88 +++++++++++++++-
src/relay/qnn/op/div.cc | 117 +++++++++++++++++++++
3 files changed, 272 insertions(+), 1 deletion(-)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
index 1f38385107..6d1cabeb8d 100644
--- a/python/tvm/relay/qnn/op/qnn.py
+++ b/python/tvm/relay/qnn/op/qnn.py
@@ -788,6 +788,74 @@ def mul(
)
+def div(
+ lhs,
+ rhs,
+ lhs_scale,
+ lhs_zero_point,
+ rhs_scale,
+ rhs_zero_point,
+ output_scale,
+ output_zero_point,
+ lhs_axis=-1,
+ rhs_axis=-1,
+):
+ """Quantized division with numpy-style broadcasting.
+
+ Parameters
+ ----------
+ lhs : relay.Expr
+ The left hand side quantized input data.
+
+ rhs : relay.Expr
+ The right hand side quantized input data.
+
+ lhs_scale: relay.Expr
+ The scale of the lhs quantized expr.
+
+ lhs_zero_point: relay.Expr
+ The zero point of lhs quantized expr.
+
+ rhs_scale: relay.Expr
+ The scale of the rhs quantized expr.
+
+ rhs_zero_point: relay.Expr
+ The zero point of rhs quantized expr.
+
+ output_scale: relay.Expr
+ The scale of the output quantized expr.
+
+ output_zero_point: relay.Expr
+ The zero point of output quantized expr.
+
+ lhs_axis: int
+ The channel axis for lhs quantization. Default value is -1 which corresponds
+ to the last axis.
+
+ rhs_axis: int
+ The channel axis for rhs quantization. Default value is -1 which corresponds
+ to the last axis.
+
+ Returns
+ -------
+ result : relay.Expr
+ The computed result.
+
+ """
+ return _make.div(
+ lhs,
+ rhs,
+ lhs_scale,
+ lhs_zero_point,
+ rhs_scale,
+ rhs_zero_point,
+ output_scale,
+ output_zero_point,
+ lhs_axis,
+ rhs_axis,
+ )
+
+
def tanh(x, scale, zero_point, output_scale, output_zero_point):
"""Quantized tanh.
diff --git a/python/tvm/relay/transform/fake_quantization_to_integer.py b/python/tvm/relay/transform/fake_quantization_to_integer.py
index bb874c131c..5b6845bd63 100644
--- a/python/tvm/relay/transform/fake_quantization_to_integer.py
+++ b/python/tvm/relay/transform/fake_quantization_to_integer.py
@@ -19,6 +19,7 @@ import numpy as np
import tvm
from tvm import relay
from tvm.ir import TensorAffineType, TupleAffineType
+from tvm.relay.op.tensor import ones_like
# import to register canonicalization funcs for fq2i
# pylint: disable=unused-import
@@ -199,6 +200,60 @@ def broadcast_to(expr, type_map):
return [out, t]
+@register_fake_quantization_to_integer("take")
+def take(expr, type_map):
+ """Rewrite a take op"""
+ arg1 = expr.args[0]
+ t = type_map[arg1]
+ arg2 = expr.args[1]
+ out = relay.op.take(
+ arg1,
+ arg2,
+ axis=expr.attrs.axis,
+ batch_dims=expr.attrs.batch_dims,
+ mode=expr.attrs.mode,
+ )
+ return [out, t]
+
+
+@register_fake_quantization_to_integer("power")
+def power(expr, type_map):
+ base = expr.args[0]
+ exponent = expr.args[1]
+
+ base_type = type_map[base]
+
+ if not isinstance(exponent, relay.Constant):
+ return [expr, type_map[expr]]
+
+ data = exponent.data.numpy()
+ if not len(data.shape) == 0:
+ return [expr, type_map[expr]]
+
+ data = data.item()
+ if data != 2:
+ return [expr, type_map[expr]]
+
+ out = relay.qnn.op.mul(
+ base,
+ base,
+ base_type.scale,
+ base_type.zero_point,
+ base_type.scale,
+ base_type.zero_point,
+ output_scale=base_type.scale * base_type.scale,
+ output_zero_point=base_type.zero_point,
+ lhs_axis=base_type.axis,
+ rhs_axis=base_type.axis,
+ )
+ return [
+ out,
+ TensorAffineType(
+ base_type.scale * base_type.scale, base_type.zero_point, base_type.dtype, base_type.axis
+ ),
+ ]
+
+
@register_fake_quantization_to_integer("nn.bias_add")
def bias_add(expr, type_map):
"""Rewrite a bias_add op"""
@@ -513,6 +568,37 @@ def register_binary_qnn(op_name, op):
register_binary_qnn("add", lambda *args: relay.qnn.op.add(*args))
register_binary_qnn("multiply", lambda *args: relay.qnn.op.mul(*args))
register_binary_qnn("subtract", lambda *args: relay.qnn.op.subtract(*args))
+register_binary_qnn("divide", lambda *args: relay.qnn.op.div(*args))
+
+
+'''
+@register_fake_quantization_to_integer("divide")
+def divide(expr, type_map):
+ """Rewrite an adaptive avgpool op"""
+ numerator = expr.args[0]
+ denominator = expr.args[1]
+ numerator_t = type_map[numerator]
+ denominator_t = type_map[denominator]
+ new_scale = numerator_t.scale / (denominator_t.scale * (denominator - denominator_t.zero_point))
+ out = relay.divide(numerator, ones_like(denominator))
+ assert numerator_t.axis == denominator_t.axis, "Only support identical axis for now."
+ # print(out)
+
+ print("new out:")
+ str_new_out = str(relay.transform.InferType()(tvm.IRModule.from_expr(out)))
+ print("\n".join(str_new_out.split("\n")[-10:]))
+ print("old_out:")
+ str_old_out = str(relay.transform.InferType()(tvm.IRModule.from_expr(expr)))
+ print("\n".join(str_old_out.split("\n")[-10:]))
+ print()
+ breakpoint()
+ # print("yay!")
+ # This is to get broadcasting working to get same shape
+ return [
+ out,
+ TensorAffineType(new_scale, numerator_t.zero_point, numerator_t.dtype, numerator_t.axis),
+ ]
+'''
def register_binary_identity(op_name, op):
@@ -578,4 +664,4 @@ register_unary_qnn("sigmoid", relay.qnn.op.sigmoid)
register_unary_qnn("hardswish", relay.qnn.op.hardswish)
register_unary_qnn("tanh", relay.qnn.op.tanh)
register_unary_qnn("abs", relay.qnn.op.abs)
-register_unary_qnn("log", relay.qnn.op.log)
+register_unary_qnn("log", relay.qnn.op.log)
\ No newline at end of file
diff --git a/src/relay/qnn/op/div.cc b/src/relay/qnn/op/div.cc
new file mode 100644
index 0000000000..3c37ed41c4
--- /dev/null
+++ b/src/relay/qnn/op/div.cc
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/mul.cc
+ * \brief QNN mul operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include <tvm/relay/qnn/attrs.h>
+
+#include "../../transforms/pattern_utils.h"
+#include "../utils.h"
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN div op.
+ * \param attrs The QNN div attrs.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for mul op.
+ */
+Expr QnnDivCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
+ const Array<tvm::relay::Type>& arg_types) {
+ Expr output;
+
+ // Get the attrs.
+ QnnBinaryOpArguments args(new_args);
+
+ // Get the input dtype and shape.
+ QnnBinaryOpTensorType input_type(arg_types, 0);
+
+ // data types
+ const auto int32_dtype = DataType::Int(32);
+ const auto float32_dtype = DataType::Float(32);
+
+ const auto* broadcast_attrs = attrs.as<BroadcastAttrs>();
+ ICHECK(broadcast_attrs != nullptr);
+
+ if (IsConstScalar(args.lhs_scale) && IsConstScalar(args.rhs_scale)) {
+ /* If both are constant:
+
+ n1/n2 = [s1(q1-z1)] / [s2(q2-z2)]
+ n1/n2 = [s1/s2][(q1-z1)/(q2-z2)]
+
+ As [(q1-z1)/(q2-z2)] is integer division, we lose perhaps significant precision.
+ To get around this we scale the numerator by C to ensure that
+
+ |C(q1-z1)| >> (q2 - z2) and the precision loss from the division is minimal:
+
+ n1/n2 = [s1/(s2 * C)][C(q1-z1)/(q2-z2)]
+ */
+
+ auto lhs_shifted = Cast(args.lhs, int32_dtype);
+ auto rhs_shifted = Cast(args.rhs, int32_dtype);
+
+ auto zero_scalar = MakeConstantScalar(int32_dtype, 0);
+ if (!IsEqualScalar(args.lhs_zero_point, zero_scalar)) {
+ lhs_shifted = Subtract(lhs_shifted, args.lhs_zero_point);
+ }
+
+ if (!IsEqualScalar(args.rhs_zero_point, zero_scalar)) {
+ rhs_shifted = Subtract(rhs_shifted, args.rhs_zero_point);
+ }
+
+ // multiply numerator to avoid precision loss, as accumulate in INT32 and
+ // may deal with UINT16, multiply by 2^15
+ int divide_scale_factor = 32768;
+ auto divide_scale_factor_constant = MakeConstantScalar(int32_dtype, divide_scale_factor);
+ output = Divide(Multiply(lhs_shifted, divide_scale_factor_constant), rhs_shifted);
+
+ // Get the adjusted new scale and zero points.
+ float lhs_scale_float = GetScalarFromConstant<float>(args.lhs_scale);
+ float rhs_scale_float = GetScalarFromConstant<float>(args.rhs_scale);
+ float new_scale_float = lhs_scale_float / (rhs_scale_float * divide_scale_factor);
+ auto new_input_scale = MakeConstantScalar(float32_dtype, new_scale_float);
+ auto new_input_zero_point = zero_scalar;
+
+ // Requantize to get Q_c
+ output = Requantize(output, input_type.shape, new_input_scale, new_input_zero_point,
+ args.output_scale, args.output_zero_point, input_type.dtype);
+ } else {
+ LOG(FATAL) << "Non-constant scale_factor not supported yet.";
+ }
+
+ return output;
+}
+
+// QNN Multiplication operator.
+QNN_REGISTER_BINARY_OP("div")
+ .describe("Elementwise div with broadcasting for quantized tensors.")
+ .set_support_level(11)
+ .set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnDivCanonicalize);
+
+} // namespace qnn
+} // namespace relay
+} // namespace tvm