You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/03/26 17:01:26 UTC

[GitHub] [incubator-tvm] shoubhik opened a new pull request #5153: Adding support for QNN subtract op

shoubhik opened a new pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153
 
 
   * Adding support for QNN subtract operator.
   * Refactoring the code for QNN Binary operators to use a common infrastructure.
   
   
   Thanks for contributing to TVM!   Please refer to guideline https://docs.tvm.ai/contribute/ for useful information and tips. After the pull request is submitted, please request code reviews from [Reviewers](https://github.com/apache/incubator-tvm/blob/master/CONTRIBUTORS.md#reviewers) by @ them in the pull request thread.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399444523
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr& expr,
+                               const Expr& expr_scale,
+                               const Expr& expr_zero_point,
+                               const Expr& target_scale,
+                               const Expr& target_zero_point,
+                               const Array <PrimExpr>& expr_shape,
+                               const DataType& target_dtype=DataType::Int(32)) {
+  auto result = expr;
+  if (!IsEqualScalar(expr_scale, target_scale) ||
+     !IsEqualScalar(expr_zero_point, target_zero_point)) {
 
 Review comment:
   done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399454748
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -74,16 +212,17 @@ static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, con
                      output_scale, output_zero_point}, Attrs(), {});    \
   });                                                                   \
   RELAY_REGISTER_OP("qnn." OpName)                                      \
-  .set_num_inputs(8)                                                    \
-    .add_argument("lhs", "Tensor", "The left hand side quantized tensor.")                 \
-    .add_argument("rhs", "Tensor", "The right hand side quantized tensor.")                \
-    .add_argument("lhs_scale", "Tensor", "The scale of the lhs tensor.")                   \
-    .add_argument("lhs_zero_point", "Tensor", "The zero_point of the lhs tensor.")         \
-    .add_argument("rhs_scale", "Tensor", "The scale of the rhs tensor.")                   \
-    .add_argument("rhs_zero_point", "Tensor", "The zero_point of the rhs tensor.")         \
-    .add_argument("output_scale", "Tensor", "The scale of the output tensor.")             \
-    .add_argument("output_zero_point", "Tensor", "The zero_point of the output tensor.")   \
-    .add_type_rel("QnnBroadcast", QnnBroadcastRel)
+  .set_num_inputs(kNumQnnBinaryOpInputs)                                                    \
 
 Review comment:
   One last, the \ is unaligned.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398806276
 
 

 ##########
 File path: src/relay/qnn/op/add.cc
 ##########
 @@ -97,39 +66,29 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   //          Q_c = Q_a' + Q_b' - zp_c
   // The add op is done in int32 precision.
 
-  // Requantize LHS if necessary.
-  auto requantized_lhs = lhs;
-  if (!IsEqualScalar(lhs_scale, output_scale) ||
-      !IsEqualScalar(lhs_zero_point, output_zero_point)) {
-    requantized_lhs = Requantize(lhs, input_shape, lhs_scale, lhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_lhs = Cast(requantized_lhs, DataType::Int(32));
-  }
 
-  // Requantize RHS if necessary.
-  auto requantized_rhs = rhs;
-  if (!IsEqualScalar(rhs_scale, output_scale) ||
-      !IsEqualScalar(rhs_zero_point, output_zero_point)) {
-    requantized_rhs = Requantize(rhs, input_shape, rhs_scale, rhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_rhs = Cast(requantized_rhs, DataType::Int(32));
-  }
 
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = requantizeIfNeeded(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = requantizeIfNeeded(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Computes Q_a' + Q_b'
   auto output = Add(requantized_lhs, requantized_rhs);
 
-  // Subtract zero point.
+  // Subtract zero point. Computes (Q_a' + Q_b') - zp_c
   auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
-  if (!IsEqualScalar(output_zero_point, zero_scalar)) {
-    output = Subtract(output, output_zero_point);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Subtract(output, args.output_zero_point);
   }
 
   // Go back to lower precision.
-  auto q_min = GetQmin(input_dtype);
-  auto q_max = GetQmax(input_dtype);
-  output = Clip(output, q_min, q_max);
-  return Cast(output, input_dtype);
+  return lowerPrecision(output, inputShapeAndDtype.input_dtype);
 
 Review comment:
   i think `ConvertDtype` makse sense?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399458287
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -74,16 +212,17 @@ static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, con
                      output_scale, output_zero_point}, Attrs(), {});    \
   });                                                                   \
   RELAY_REGISTER_OP("qnn." OpName)                                      \
-  .set_num_inputs(8)                                                    \
-    .add_argument("lhs", "Tensor", "The left hand side quantized tensor.")                 \
-    .add_argument("rhs", "Tensor", "The right hand side quantized tensor.")                \
-    .add_argument("lhs_scale", "Tensor", "The scale of the lhs tensor.")                   \
-    .add_argument("lhs_zero_point", "Tensor", "The zero_point of the lhs tensor.")         \
-    .add_argument("rhs_scale", "Tensor", "The scale of the rhs tensor.")                   \
-    .add_argument("rhs_zero_point", "Tensor", "The zero_point of the rhs tensor.")         \
-    .add_argument("output_scale", "Tensor", "The scale of the output tensor.")             \
-    .add_argument("output_zero_point", "Tensor", "The zero_point of the output tensor.")   \
-    .add_type_rel("QnnBroadcast", QnnBroadcastRel)
+  .set_num_inputs(kNumQnnBinaryOpInputs)                                                    \
 
 Review comment:
   done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398816558
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,10 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpDtypeAndShape inputShapeAndDtype(arg_types);
 
 Review comment:
   done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399484380
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr &expr,
 
 Review comment:
   Zhi means  `&` should be attached to `Expr` and not `expr`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398788973
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,155 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpDtypeAndShape {
+  DataType input_dtype;
+  Array <PrimExpr> input_shape;
+
+  explicit QnnBinaryOpDtypeAndShape(const Array<tvm::relay::Type>& arg_types) {
+    CHECK_EQ(arg_types.size(), numQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[0].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    input_dtype = tensor_type->dtype;
+    input_shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr lowerPrecision(const Expr& expr,
+                           const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * Full precision Int32 data type for explicitly casting
+ * Int8/UInt8 to Int32 and create Int32 constants.
+ */
+const auto fullPrecisionInt32 = DataType::Int(32);
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr requantizeIfNeeded(const Expr& expr,
+                               const Expr& expr_scale,
+                               const Expr& expr_zero_point,
+                               const Expr& target_scale,
+                               const Expr& target_zero_point,
+                               const Array <PrimExpr>& expr_shape) {
+  auto result = expr;
+  if (!IsEqualScalar(expr_scale, target_scale) ||
+     !IsEqualScalar(expr_zero_point, target_zero_point)) {
+    result = Requantize(expr, expr_shape, expr_scale, expr_zero_point,
+                        target_scale, target_zero_point, fullPrecisionInt32);
+  } else {
+    result = Cast(result, fullPrecisionInt32);
 
 Review comment:
   Maybe we need to come up with better name for the function. Here if not requantize, we are still upcasting, that is not clear from the name.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399431735
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
+                             const Array<Expr> &new_args,
+                             const Array<tvm::relay::Type> &arg_types) {
+  // Get the args.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // TODO(shoubhik) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if both input tensors have same qnn params. In
+  // that case, we can first subtract the tensors, add the zero point, and requantize at the end.
+  // This can be done in future.
+
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensors to the output qnn params. Then we call relay.subtract on the requantized inputs.
+  // This subtraction results in extra subtraction of the output zero point. We further add
+  // the zero point. The whole process can be represented using following equations
+  //
+  //          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) - scale_b * (Q_b - zp_b)
+  //
+  // After requantizing Q_a and Q_b, equation becomes,
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) - scale_c * (Q_b' - zp_c)
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - Q_b')
+  //
+  // Comparing the LHS and RHS, it results in
+  //          Q_c = Q_a' - Q_b' + zp_c
+  // The subtract op is done in int32 precision.
+
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = RequantizeOrUpcast(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = RequantizeOrUpcast(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+
+  // Computes Q_a' - Q_b'
+  auto output = Subtract(requantized_lhs, requantized_rhs);
+
+  // Add zero point. Computes (Q_a' - Q_b') + zp_c
+  auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Add(output, args.output_zero_point);
+  }
+
+  // Go back to lower precision.
+  return ConvertDtype(output, input_type.dtype);
+}
+
+// QNN Addition operator.
+QNN_REGISTER_BINARY_OP("subtract")
+.describe("Elementwise subtract with with broadcasting for quantized tensors.")
+.set_support_level(11)
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnSubtractCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 Review comment:
   While you are here, this can be moved to op_common.h where we define MACRO `QNN_REGISTER_BINARY_OP`, and remove it from here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398857826
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,151 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
 
 Review comment:
   made the change.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399454748
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -74,16 +212,17 @@ static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, con
                      output_scale, output_zero_point}, Attrs(), {});    \
   });                                                                   \
   RELAY_REGISTER_OP("qnn." OpName)                                      \
-  .set_num_inputs(8)                                                    \
-    .add_argument("lhs", "Tensor", "The left hand side quantized tensor.")                 \
-    .add_argument("rhs", "Tensor", "The right hand side quantized tensor.")                \
-    .add_argument("lhs_scale", "Tensor", "The scale of the lhs tensor.")                   \
-    .add_argument("lhs_zero_point", "Tensor", "The zero_point of the lhs tensor.")         \
-    .add_argument("rhs_scale", "Tensor", "The scale of the rhs tensor.")                   \
-    .add_argument("rhs_zero_point", "Tensor", "The zero_point of the rhs tensor.")         \
-    .add_argument("output_scale", "Tensor", "The scale of the output tensor.")             \
-    .add_argument("output_zero_point", "Tensor", "The zero_point of the output tensor.")   \
-    .add_type_rel("QnnBroadcast", QnnBroadcastRel)
+  .set_num_inputs(kNumQnnBinaryOpInputs)                                                    \
 
 Review comment:
   One last, the "\" is unaligned.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398849702
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,151 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpTypes {
 
 Review comment:
   QnnBinaryOpType? Seems this is singular?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398847759
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,155 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpDtypeAndShape {
+  DataType input_dtype;
+  Array <PrimExpr> input_shape;
+
+  explicit QnnBinaryOpDtypeAndShape(const Array<tvm::relay::Type>& arg_types) {
+    CHECK_EQ(arg_types.size(), numQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[0].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    input_dtype = tensor_type->dtype;
+    input_shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr lowerPrecision(const Expr& expr,
+                           const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * Full precision Int32 data type for explicitly casting
+ * Int8/UInt8 to Int32 and create Int32 constants.
+ */
+const auto fullPrecisionInt32 = DataType::Int(32);
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr requantizeIfNeeded(const Expr& expr,
+                               const Expr& expr_scale,
+                               const Expr& expr_zero_point,
+                               const Expr& target_scale,
+                               const Expr& target_zero_point,
+                               const Array <PrimExpr>& expr_shape) {
+  auto result = expr;
+  if (!IsEqualScalar(expr_scale, target_scale) ||
+     !IsEqualScalar(expr_zero_point, target_zero_point)) {
+    result = Requantize(expr, expr_shape, expr_scale, expr_zero_point,
+                        target_scale, target_zero_point, fullPrecisionInt32);
+  } else {
+    result = Cast(result, fullPrecisionInt32);
 
 Review comment:
   Thats ok

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 merged pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 merged pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on issue #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on issue #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#issuecomment-604650530
 
 
   @zhiics Please take a look as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399447558
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
+                             const Array<Expr> &new_args,
+                             const Array<tvm::relay::Type> &arg_types) {
+  // Get the args.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // TODO(shoubhik) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if both input tensors have same qnn params. In
+  // that case, we can first subtract the tensors, add the zero point, and requantize at the end.
+  // This can be done in future.
+
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensors to the output qnn params. Then we call relay.subtract on the requantized inputs.
+  // This subtraction results in extra subtraction of the output zero point. We further add
+  // the zero point. The whole process can be represented using following equations
+  //
+  //          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) - scale_b * (Q_b - zp_b)
+  //
+  // After requantizing Q_a and Q_b, equation becomes,
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) - scale_c * (Q_b' - zp_c)
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - Q_b')
+  //
+  // Comparing the LHS and RHS, it results in
+  //          Q_c = Q_a' - Q_b' + zp_c
+  // The subtract op is done in int32 precision.
+
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = RequantizeOrUpcast(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = RequantizeOrUpcast(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+
+  // Computes Q_a' - Q_b'
+  auto output = Subtract(requantized_lhs, requantized_rhs);
+
+  // Add zero point. Computes (Q_a' - Q_b') + zp_c
+  auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Add(output, args.output_zero_point);
+  }
+
+  // Go back to lower precision.
+  return ConvertDtype(output, input_type.dtype);
+}
+
+// QNN Addition operator.
+QNN_REGISTER_BINARY_OP("subtract")
+.describe("Elementwise subtract with with broadcasting for quantized tensors.")
+.set_support_level(11)
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnSubtractCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 Review comment:
   The suggested change is only the line 99 
   
   ~~~
   .set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
   
   ~~~

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398849174
 
 

 ##########
 File path: src/relay/qnn/op/add.cc
 ##########
 @@ -97,39 +66,29 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   //          Q_c = Q_a' + Q_b' - zp_c
   // The add op is done in int32 precision.
 
-  // Requantize LHS if necessary.
-  auto requantized_lhs = lhs;
-  if (!IsEqualScalar(lhs_scale, output_scale) ||
-      !IsEqualScalar(lhs_zero_point, output_zero_point)) {
-    requantized_lhs = Requantize(lhs, input_shape, lhs_scale, lhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_lhs = Cast(requantized_lhs, DataType::Int(32));
-  }
 
-  // Requantize RHS if necessary.
-  auto requantized_rhs = rhs;
-  if (!IsEqualScalar(rhs_scale, output_scale) ||
-      !IsEqualScalar(rhs_zero_point, output_zero_point)) {
-    requantized_rhs = Requantize(rhs, input_shape, rhs_scale, rhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_rhs = Cast(requantized_rhs, DataType::Int(32));
-  }
 
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = requantizeIfNeeded(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = requantizeIfNeeded(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Computes Q_a' + Q_b'
   auto output = Add(requantized_lhs, requantized_rhs);
 
-  // Subtract zero point.
+  // Subtract zero point. Computes (Q_a' + Q_b') - zp_c
   auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
-  if (!IsEqualScalar(output_zero_point, zero_scalar)) {
-    output = Subtract(output, output_zero_point);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Subtract(output, args.output_zero_point);
   }
 
   // Go back to lower precision.
-  auto q_min = GetQmin(input_dtype);
-  auto q_max = GetQmax(input_dtype);
-  output = Clip(output, q_min, q_max);
-  return Cast(output, input_dtype);
+  return lowerPrecision(output, inputShapeAndDtype.input_dtype);
 
 Review comment:
   `ShrinkBankToOutDtype` ? `ConvertDtype` is vague as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399444424
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,13 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+  // data types
+  const auto Int32 = DataType::Int(32);
+  const auto Float32 = DataType::Float(32);
 
 Review comment:
   done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399443268
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
+                             const Array<Expr> &new_args,
+                             const Array<tvm::relay::Type> &arg_types) {
+  // Get the args.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // TODO(shoubhik) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if both input tensors have same qnn params. In
+  // that case, we can first subtract the tensors, add the zero point, and requantize at the end.
+  // This can be done in future.
+
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensors to the output qnn params. Then we call relay.subtract on the requantized inputs.
+  // This subtraction results in extra subtraction of the output zero point. We further add
+  // the zero point. The whole process can be represented using following equations
+  //
+  //          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) - scale_b * (Q_b - zp_b)
+  //
+  // After requantizing Q_a and Q_b, equation becomes,
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) - scale_c * (Q_b' - zp_c)
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - Q_b')
+  //
+  // Comparing the LHS and RHS, it results in
+  //          Q_c = Q_a' - Q_b' + zp_c
+  // The subtract op is done in int32 precision.
+
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = RequantizeOrUpcast(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = RequantizeOrUpcast(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+
+  // Computes Q_a' - Q_b'
+  auto output = Subtract(requantized_lhs, requantized_rhs);
+
+  // Add zero point. Computes (Q_a' - Q_b') + zp_c
+  auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Add(output, args.output_zero_point);
+  }
+
+  // Go back to lower precision.
+  return ConvertDtype(output, input_type.dtype);
+}
+
+// QNN Addition operator.
+QNN_REGISTER_BINARY_OP("subtract")
+.describe("Elementwise subtract with with broadcasting for quantized tensors.")
+.set_support_level(11)
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnSubtractCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 Review comment:
   I beleive this should be here. otherwise there woluld be a confusion as to where the op is registered. I have seen this convention being followed in all other cc files. I think we should stick with the convention. plus, there is a danger of moving this to .h file. just is case someone screws up later and removes or forgets to link the header file these ops will never be registerd.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398780962
 
 

 ##########
 File path: python/tvm/relay/qnn/op/qnn.py
 ##########
 @@ -436,3 +436,54 @@ def mul(lhs, rhs, lhs_scale, lhs_zero_point, rhs_scale, rhs_zero_point,
                      lhs_scale, lhs_zero_point,
                      rhs_scale, rhs_zero_point,
                      output_scale, output_zero_point)
+
+
+def subtract(lhs,
+             rhs,
+             lhs_scale,
+             lhs_zero_point,
+             rhs_scale,
+             rhs_zero_point,
+             output_scale,
+             output_zero_point):
+    """Quantized subtraction with numpy-style broadcasting.
+
+    Parameters
+    ----------
+    lhs : relay.Expr
+        The left hand side quantized input data.
+
+    rhs : relay.Expr
+        The right hand side quantized input data.
+
+    lhs_scale: float
+        The scale of the lhs quantized expr.
+
 
 Review comment:
   Remove

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399490983
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
 
 Review comment:
   done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398850100
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -71,31 +59,35 @@ Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   which is essentially a requantization of tensor Q' into tensor Q_c.
   */
 
-  auto lhs_shifted = Cast(lhs, DataType::Int(32));
-  auto rhs_shifted = Cast(rhs, DataType::Int(32));
+  auto lhs_shifted = Cast(args.lhs, fullPrecisionInt32);
 
 Review comment:
   One compromise could be just to name them `Int32` or `Float32` which carries the necessary information and also has the safety associated. For now I have made the change have added the names Int32 and Float32 so it both intuitive and easy to change.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] zhiics commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

zhiics commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399459498
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr &expr,
 
 Review comment:
   one minor: please use `const Expr& expr`, for all other places as well

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398851124
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,151 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpTypes {
 
 Review comment:
   At this time yes, but it is possible to add types from other parts of the `arg_types`. Later if needed we can have another structure QnnBinaryOpType and then QnnBinaryOpTypes can have them as member. but for now it seems an overkill.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399428671
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,13 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+  // data types
+  const auto Int32 = DataType::Int(32);
 
 Review comment:
   Still think that this might not be required. But, if you insist, we need to follow the variable name conventiaon - snake case not camel case. We can name it `int32_dtype`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398787686
 
 

 ##########
 File path: src/relay/qnn/op/add.cc
 ##########
 @@ -97,39 +66,29 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   //          Q_c = Q_a' + Q_b' - zp_c
   // The add op is done in int32 precision.
 
-  // Requantize LHS if necessary.
-  auto requantized_lhs = lhs;
-  if (!IsEqualScalar(lhs_scale, output_scale) ||
-      !IsEqualScalar(lhs_zero_point, output_zero_point)) {
-    requantized_lhs = Requantize(lhs, input_shape, lhs_scale, lhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_lhs = Cast(requantized_lhs, DataType::Int(32));
-  }
 
-  // Requantize RHS if necessary.
-  auto requantized_rhs = rhs;
-  if (!IsEqualScalar(rhs_scale, output_scale) ||
-      !IsEqualScalar(rhs_zero_point, output_zero_point)) {
-    requantized_rhs = Requantize(rhs, input_shape, rhs_scale, rhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_rhs = Cast(requantized_rhs, DataType::Int(32));
-  }
 
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = requantizeIfNeeded(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = requantizeIfNeeded(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Computes Q_a' + Q_b'
   auto output = Add(requantized_lhs, requantized_rhs);
 
-  // Subtract zero point.
+  // Subtract zero point. Computes (Q_a' + Q_b') - zp_c
   auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
-  if (!IsEqualScalar(output_zero_point, zero_scalar)) {
-    output = Subtract(output, output_zero_point);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Subtract(output, args.output_zero_point);
   }
 
   // Go back to lower precision.
-  auto q_min = GetQmin(input_dtype);
-  auto q_max = GetQmax(input_dtype);
-  output = Clip(output, q_min, q_max);
-  return Cast(output, input_dtype);
+  return lowerPrecision(output, inputShapeAndDtype.input_dtype);
 
 Review comment:
   Its not necessarily low Precision. It is basically going to the dtype of whatever input type was, which can be int32 as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398810119
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -71,31 +59,35 @@ Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   which is essentially a requantization of tensor Q' into tensor Q_c.
   */
 
-  auto lhs_shifted = Cast(lhs, DataType::Int(32));
-  auto rhs_shifted = Cast(rhs, DataType::Int(32));
+  auto lhs_shifted = Cast(args.lhs, fullPrecisionInt32);
 
 Review comment:
   do you think `int32Dtype` would be better? using DataType::Int(32) could be an error waiting in happening, if we forget to change it someplace.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399430571
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr& expr,
+                               const Expr& expr_scale,
+                               const Expr& expr_zero_point,
+                               const Expr& target_scale,
+                               const Expr& target_zero_point,
+                               const Array <PrimExpr>& expr_shape,
+                               const DataType& target_dtype=DataType::Int(32)) {
+  auto result = expr;
+  if (!IsEqualScalar(expr_scale, target_scale) ||
+     !IsEqualScalar(expr_zero_point, target_zero_point)) {
 
 Review comment:
   Align?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398857587
 
 

 ##########
 File path: src/relay/qnn/op/add.cc
 ##########
 @@ -97,39 +66,29 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   //          Q_c = Q_a' + Q_b' - zp_c
   // The add op is done in int32 precision.
 
-  // Requantize LHS if necessary.
-  auto requantized_lhs = lhs;
-  if (!IsEqualScalar(lhs_scale, output_scale) ||
-      !IsEqualScalar(lhs_zero_point, output_zero_point)) {
-    requantized_lhs = Requantize(lhs, input_shape, lhs_scale, lhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_lhs = Cast(requantized_lhs, DataType::Int(32));
-  }
 
-  // Requantize RHS if necessary.
-  auto requantized_rhs = rhs;
-  if (!IsEqualScalar(rhs_scale, output_scale) ||
-      !IsEqualScalar(rhs_zero_point, output_zero_point)) {
-    requantized_rhs = Requantize(rhs, input_shape, rhs_scale, rhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_rhs = Cast(requantized_rhs, DataType::Int(32));
-  }
 
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = requantizeIfNeeded(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = requantizeIfNeeded(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Computes Q_a' + Q_b'
   auto output = Add(requantized_lhs, requantized_rhs);
 
-  // Subtract zero point.
+  // Subtract zero point. Computes (Q_a' + Q_b') - zp_c
   auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
-  if (!IsEqualScalar(output_zero_point, zero_scalar)) {
-    output = Subtract(output, output_zero_point);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Subtract(output, args.output_zero_point);
   }
 
   // Go back to lower precision.
-  auto q_min = GetQmin(input_dtype);
-  auto q_max = GetQmax(input_dtype);
-  output = Clip(output, q_min, q_max);
-  return Cast(output, input_dtype);
+  return lowerPrecision(output, inputShapeAndDtype.input_dtype);
 
 Review comment:
   Well `ShrinkBackToOutDtype` implies that the the output will be converted to output dtype but we always convet to input dtype (what goes in comes out). `ConvertDtype(fromExpression, toTargetDtype)` is the convention I am going with. What do you think?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399428714
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,13 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+  // data types
+  const auto Int32 = DataType::Int(32);
+  const auto Float32 = DataType::Float(32);
 
 Review comment:
   Ditto

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399452260
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
+                             const Array<Expr> &new_args,
+                             const Array<tvm::relay::Type> &arg_types) {
+  // Get the args.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // TODO(shoubhik) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if both input tensors have same qnn params. In
+  // that case, we can first subtract the tensors, add the zero point, and requantize at the end.
+  // This can be done in future.
+
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensors to the output qnn params. Then we call relay.subtract on the requantized inputs.
+  // This subtraction results in extra subtraction of the output zero point. We further add
+  // the zero point. The whole process can be represented using following equations
+  //
+  //          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) - scale_b * (Q_b - zp_b)
+  //
+  // After requantizing Q_a and Q_b, equation becomes,
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) - scale_c * (Q_b' - zp_c)
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - Q_b')
+  //
+  // Comparing the LHS and RHS, it results in
+  //          Q_c = Q_a' - Q_b' + zp_c
+  // The subtract op is done in int32 precision.
+
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = RequantizeOrUpcast(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = RequantizeOrUpcast(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+
+  // Computes Q_a' - Q_b'
+  auto output = Subtract(requantized_lhs, requantized_rhs);
+
+  // Add zero point. Computes (Q_a' - Q_b') + zp_c
+  auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Add(output, args.output_zero_point);
+  }
+
+  // Go back to lower precision.
+  return ConvertDtype(output, input_type.dtype);
+}
+
+// QNN Addition operator.
+QNN_REGISTER_BINARY_OP("subtract")
+.describe("Elementwise subtract with with broadcasting for quantized tensors.")
+.set_support_level(11)
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnSubtractCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 Review comment:
   done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399475231
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr &expr,
 
 Review comment:
   @zhiics can you point out exactly where it is missing? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398810119
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -71,31 +59,35 @@ Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   which is essentially a requantization of tensor Q' into tensor Q_c.
   */
 
-  auto lhs_shifted = Cast(lhs, DataType::Int(32));
-  auto rhs_shifted = Cast(rhs, DataType::Int(32));
+  auto lhs_shifted = Cast(args.lhs, fullPrecisionInt32);
 
 Review comment:
   do you think `int32Dtype` or simply `Int32` would be better? using DataType::Int(32) could be an error waiting in happening, if we forget to change it someplace.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398784846
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,10 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpDtypeAndShape inputShapeAndDtype(arg_types);
 
 Review comment:
   s/QnnBinaryOpDtypeAndShape/QnnBinaryType
   
   s/inputShapeAndDtype/input_types (need to use _ for variable names)
   
   In Relay Type means (dtype, shape)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on issue #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on issue #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#issuecomment-604550015
 
 
   @anijain2305 can you take a look at this PR.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] zhiics commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

zhiics commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398855417
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,151 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
 
 Review comment:
   usually we use `kNumQnnBinaryOpInputs` for consts

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on issue #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on issue #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#issuecomment-605376110
 
 
   Thanks @shoubhik @zhiics This is merged!
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398862289
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,151 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpTypes {
 
 Review comment:
   On second thought we can introduce `QnnBinaryOpTypes` later. I  will revert to `QnnBinaryOpType` for now.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398849174
 
 

 ##########
 File path: src/relay/qnn/op/add.cc
 ##########
 @@ -97,39 +66,29 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   //          Q_c = Q_a' + Q_b' - zp_c
   // The add op is done in int32 precision.
 
-  // Requantize LHS if necessary.
-  auto requantized_lhs = lhs;
-  if (!IsEqualScalar(lhs_scale, output_scale) ||
-      !IsEqualScalar(lhs_zero_point, output_zero_point)) {
-    requantized_lhs = Requantize(lhs, input_shape, lhs_scale, lhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_lhs = Cast(requantized_lhs, DataType::Int(32));
-  }
 
-  // Requantize RHS if necessary.
-  auto requantized_rhs = rhs;
-  if (!IsEqualScalar(rhs_scale, output_scale) ||
-      !IsEqualScalar(rhs_zero_point, output_zero_point)) {
-    requantized_rhs = Requantize(rhs, input_shape, rhs_scale, rhs_zero_point, output_scale,
-                                 output_zero_point, DataType::Int(32));
-  } else {
-    requantized_rhs = Cast(requantized_rhs, DataType::Int(32));
-  }
 
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = requantizeIfNeeded(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = requantizeIfNeeded(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale, args.output_zero_point,
+                                            inputShapeAndDtype.input_shape);
+  // Computes Q_a' + Q_b'
   auto output = Add(requantized_lhs, requantized_rhs);
 
-  // Subtract zero point.
+  // Subtract zero point. Computes (Q_a' + Q_b') - zp_c
   auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
-  if (!IsEqualScalar(output_zero_point, zero_scalar)) {
-    output = Subtract(output, output_zero_point);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Subtract(output, args.output_zero_point);
   }
 
   // Go back to lower precision.
-  auto q_min = GetQmin(input_dtype);
-  auto q_max = GetQmax(input_dtype);
-  output = Clip(output, q_min, q_max);
-  return Cast(output, input_dtype);
+  return lowerPrecision(output, inputShapeAndDtype.input_dtype);
 
 Review comment:
   `ShrinkBackToOutDtype` ? `ConvertDtype` is vague as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398810414
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,155 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int numQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int numQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), numQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, numQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks.
+ */
+struct QnnBinaryOpDtypeAndShape {
+  DataType input_dtype;
+  Array <PrimExpr> input_shape;
+
+  explicit QnnBinaryOpDtypeAndShape(const Array<tvm::relay::Type>& arg_types) {
+    CHECK_EQ(arg_types.size(), numQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[0].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    input_dtype = tensor_type->dtype;
+    input_shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr lowerPrecision(const Expr& expr,
+                           const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * Full precision Int32 data type for explicitly casting
+ * Int8/UInt8 to Int32 and create Int32 constants.
+ */
+const auto fullPrecisionInt32 = DataType::Int(32);
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr requantizeIfNeeded(const Expr& expr,
+                               const Expr& expr_scale,
+                               const Expr& expr_zero_point,
+                               const Expr& target_scale,
+                               const Expr& target_zero_point,
+                               const Array <PrimExpr>& expr_shape) {
+  auto result = expr;
+  if (!IsEqualScalar(expr_scale, target_scale) ||
+     !IsEqualScalar(expr_zero_point, target_zero_point)) {
+    result = Requantize(expr, expr_shape, expr_scale, expr_zero_point,
+                        target_scale, target_zero_point, fullPrecisionInt32);
+  } else {
+    result = Cast(result, fullPrecisionInt32);
 
 Review comment:
   'RequantizeOrUpcast`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399444313
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -42,22 +42,13 @@ namespace qnn {
 Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
                         const Array<tvm::relay::Type>& arg_types) {
   // Get the attrs.
-  CHECK_EQ(new_args.size(), 8);
-  auto& lhs = new_args[0];
-  auto& rhs = new_args[1];
-  auto& lhs_scale = new_args[2];
-  auto& lhs_zero_point = new_args[3];
-  auto& rhs_scale = new_args[4];
-  auto& rhs_zero_point = new_args[5];
-  auto& output_scale = new_args[6];
-  auto& output_zero_point = new_args[7];
+  QnnBinaryOpArguments args(new_args);
 
   // Get the input dtype and shape.
-  CHECK_EQ(arg_types.size(), 9);
-  auto tensor_type = arg_types[0].as<TensorTypeNode>();
-  CHECK(tensor_type != nullptr);
-  auto input_dtype = tensor_type->dtype;
-  auto input_shape = tensor_type->shape;
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+  // data types
+  const auto Int32 = DataType::Int(32);
 
 Review comment:
   done

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399484380
 
 

 ##########
 File path: src/relay/qnn/op/op_common.h
 ##########
 @@ -30,14 +30,152 @@
 #include <tvm/relay/qnn/attrs.h>
 #include <vector>
 #include "../../op/type_relations.h"
+#include "../../transforms/infer_layout_util.h"
+#include "../util.h"
 
 namespace tvm {
 namespace relay {
 namespace qnn {
 
-static inline bool QnnBroadcastRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
+/*
+ * Number of inputs for the Qnn binary operators.
+ * Refer the QNN_REGISTER_BINARY_OP macro to see
+ * what the operators are.
+ */
+static constexpr int kNumQnnBinaryOpInputs = 8;
+
+/*
+ * Number of expected arg types.
+ */
+static constexpr int kNumQnnBinaryOpArgTypes = 9;
+
+/*
+ * \brief Simple struct to organize the inputs to the Qnn
+ * binary operators. The main reason to have a struct
+ * is to be able to perform the common checks needed at a
+ * central location.
+ */
+struct QnnBinaryOpArguments {
+  Expr lhs;
+  Expr rhs;
+  Expr lhs_scale;
+  Expr lhs_zero_point;
+  Expr rhs_scale;
+  Expr rhs_zero_point;
+  Expr output_scale;
+  Expr output_zero_point;
+
+  explicit QnnBinaryOpArguments(const Array<Expr>& new_args) {
+    CHECK_EQ(new_args.size(), kNumQnnBinaryOpInputs);
+    int idx = 0;
+    lhs = new_args[idx++];
+    rhs = new_args[idx++];
+    lhs_scale = new_args[idx++];
+    lhs_zero_point = new_args[idx++];
+    rhs_scale = new_args[idx++];
+    rhs_zero_point = new_args[idx++];
+    output_scale = new_args[idx++];
+    output_zero_point = new_args[idx++];
+    CHECK_EQ(idx, kNumQnnBinaryOpInputs);
+  }
+};
+
+/*
+ * \brief Simple structure to hold the input tensor's dtype
+ * and shape. This structure allows a common point to do
+ * all the validation checks for Qnn binary operators.
+ */
+struct QnnBinaryOpTensorType {
+  DataType dtype;
+  Array <PrimExpr> shape;
+
+  explicit QnnBinaryOpTensorType(const Array<tvm::relay::Type>& arg_types,
+                                 const int32_t arg_idx) {
+    CHECK_EQ(arg_types.size(), kNumQnnBinaryOpArgTypes);
+    auto tensor_type = arg_types[arg_idx].as<TensorTypeNode>();
+    CHECK(tensor_type != nullptr);
+    dtype = tensor_type->dtype;
+    shape = tensor_type->shape;
+  }
+};
+
+/*
+ * \brief Converts the expression from expression's dtype
+ * to target dtype. This is mainly used for converting
+ * computations done in Int32 to lower precision Int8 or
+ * UInt8.
+ * \param expr The expression to whose dtype needs conversion.
+ * \param target_dtype The dtype of the target expression
+ * \return New expression with target dtype and possibly lower
+ * precision.
+ */
+inline Expr ConvertDtype(const Expr& expr,
+                         const DataType& target_dtype) {
+  auto q_min = GetQmin(target_dtype);
+  auto q_max = GetQmax(target_dtype);
+  auto output = Clip(expr, q_min, q_max);
+  return Cast(output, target_dtype);
+}
+
+/*
+ * \brief Requantizes the given expression if expression's
+ * scale and zero point both do not match target scale and
+ * zero point. This is mainly needed for requantizing the
+ * input tensors with output tensor's scale and zero point
+ * to ease the computation of final quantized tensor.
+ * \param expr The expression on which the check needs to be performed.
+ * \param expr_scale The scale of the expression.
+ * \param expr_zero_point The zero point of the expression.
+ * \param target_scale The scale of the output tensor.
+ * \param target_zero_point The zero point of the output tensor.
+ * \param expr_shape The shape of the input expression.
+ * \return New expression that is requantized to target scale and zero
+ * point if the expression scale and zero points are different otherwise
+ * it simply casts the given expression to Int32 as no requantization is
+ * needed in this case.
+ */
+inline Expr RequantizeOrUpcast(const Expr &expr,
 
 Review comment:
   Zhi means replace that `&` should be attached to `Expr` and not `expr`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398847589
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -71,31 +59,35 @@ Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   which is essentially a requantization of tensor Q' into tensor Q_c.
   */
 
-  auto lhs_shifted = Cast(lhs, DataType::Int(32));
-  auto rhs_shifted = Cast(rhs, DataType::Int(32));
+  auto lhs_shifted = Cast(args.lhs, fullPrecisionInt32);
 
 Review comment:
   I understand the concern. At the same time, one might want to upcast only to int16 and not int32 while subtracting zero points for one operator, while keep int32 for other (for example, in pool, even int16 upcasting is ok). Similarly, one might choose int64 for mul in future. So, I think it is safe to keep DataType::Int(32).

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399446872
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
+                             const Array<Expr> &new_args,
+                             const Array<tvm::relay::Type> &arg_types) {
+  // Get the args.
+  QnnBinaryOpArguments args(new_args);
+
+  // Get the input dtype and shape.
+  QnnBinaryOpTensorType input_type(arg_types, 0);
+
+  // TODO(shoubhik) - The lowering can be further optimized. Instead of inserting requantize in
+  // the start, we can insert requantize at the end if both input tensors have same qnn params. In
+  // that case, we can first subtract the tensors, add the zero point, and requantize at the end.
+  // This can be done in future.
+
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensors to the output qnn params. Then we call relay.subtract on the requantized inputs.
+  // This subtraction results in extra subtraction of the output zero point. We further add
+  // the zero point. The whole process can be represented using following equations
+  //
+  //          scale_c * (Q_c - zp_c) = scale_a * (Q_a - zp_a) - scale_b * (Q_b - zp_b)
+  //
+  // After requantizing Q_a and Q_b, equation becomes,
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - zp_c) - scale_c * (Q_b' - zp_c)
+  //          scale_c * (Q_c - zp_c) = scale_c * (Q_a' - Q_b')
+  //
+  // Comparing the LHS and RHS, it results in
+  //          Q_c = Q_a' - Q_b' + zp_c
+  // The subtract op is done in int32 precision.
+
+  // Requantize LHS if necessary. Computes Q_a'
+  auto requantized_lhs = RequantizeOrUpcast(args.lhs, args.lhs_scale,
+                                            args.lhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+  // Requantize RHS if necessary. Computes Q_b'
+  auto requantized_rhs = RequantizeOrUpcast(args.rhs, args.rhs_scale,
+                                            args.rhs_zero_point,
+                                            args.output_scale,
+                                            args.output_zero_point,
+                                            input_type.shape);
+
+  // Computes Q_a' - Q_b'
+  auto output = Subtract(requantized_lhs, requantized_rhs);
+
+  // Add zero point. Computes (Q_a' - Q_b') + zp_c
+  auto zero_scalar = MakeConstantScalar(DataType::Int(32), 0);
+  if (!IsEqualScalar(args.output_zero_point, zero_scalar)) {
+    output = Add(output, args.output_zero_point);
+  }
+
+  // Go back to lower precision.
+  return ConvertDtype(output, input_type.dtype);
+}
+
+// QNN Addition operator.
+QNN_REGISTER_BINARY_OP("subtract")
+.describe("Elementwise subtract with with broadcasting for quantized tensors.")
+.set_support_level(11)
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnSubtractCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnBinaryBroadcastLayout);
 
 Review comment:
   I meant here 
   
   https://github.com/apache/incubator-tvm/blob/949dca4d078dbeb7990294b1f7a83de5b6ed4a25/src/relay/qnn/op/op_common.h#L86
   
   
   Just like we share InferType function, we can share InferLayout between add/mul/subtract.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r398786009
 
 

 ##########
 File path: src/relay/qnn/op/mul.cc
 ##########
 @@ -71,31 +59,35 @@ Expr QnnMulCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   which is essentially a requantization of tensor Q' into tensor Q_c.
   */
 
-  auto lhs_shifted = Cast(lhs, DataType::Int(32));
-  auto rhs_shifted = Cast(rhs, DataType::Int(32));
+  auto lhs_shifted = Cast(args.lhs, fullPrecisionInt32);
 
 Review comment:
   I think DataType::Int(32) is more or as informative as fullPrecisionInt32.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] zhiics commented on a change in pull request #5153: Adding support for QNN subtract op

Posted by GitBox <gi...@apache.org>.

zhiics commented on a change in pull request #5153: Adding support for QNN subtract op
URL: https://github.com/apache/incubator-tvm/pull/5153#discussion_r399459742
 
 

 ##########
 File path: src/relay/qnn/op/subtract.cc
 ##########
 @@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/qnn/op/subtract.cc
+ * \brief QNN add operator.
+ */
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/op_attr_types.h>
+#include "op_common.h"
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*
+ * \brief Canonicalizes the QNN subtract op.
+ * \param attrs The empty attribute.
+ * \param new_args The new mutated args to the call node.
+ * \param arg_types The types of input and output.
+ * \return The sequence of Relay ops for add op.
+ */
+Expr QnnSubtractCanonicalize(const Attrs &attrs,
 
 Review comment:
   ditto

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services