You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/07/19 17:07:40 UTC

[GitHub] [tvm] AndrewZhaoLuo commented on a diff in pull request #12116: [QNN] Support different qnn params between in/out tensor in leaky_relu

AndrewZhaoLuo commented on code in PR #12116:
URL: https://github.com/apache/tvm/pull/12116#discussion_r924739254


##########
python/tvm/relay/qnn/op/qnn.py:
##########
@@ -1179,7 +1179,7 @@ def batch_matmul(x, y, x_zero_point, y_zero_point, x_scale, y_scale, out_dtype="
 reg.register_pattern("qnn.dequantize", OpPattern.OPAQUE)
 
 
-def leaky_relu(x, alpha, scale, zero_point):
+def leaky_relu(x, alpha, scale, zero_point, output_scale, output_zero_point):

Review Comment:
   change scale, zero_point --> input_scale, input_zero_point



##########
python/tvm/relay/qnn/op/qnn.py:
##########
@@ -1192,15 +1192,13 @@ def leaky_relu(x, alpha, scale, zero_point):
         The scale of the quantized expr.

Review Comment:
   change this  and desc. for zero point to 'scale of the input quantized expr'



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -32,8 +32,8 @@ namespace qnn {
 
 bool QnnLeakyReluRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
                      const TypeReporter& reporter) {
-  // Expected Types: data, scale, zero_point
-  ICHECK_EQ(types.size(), 4);
+  // Expected Types: data, scale, zero_point, output_scale, output_zero_point, out_type

Review Comment:
   change scale, zero_point ref. to input_scale, input_zero_point



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)

Review Comment:
   should (2) be `scale_o * (Q_o - zp_o) = scale_i * (Q_i - zp_i) when Q_i >= zp_i`



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)
+  //
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensor to the output qnn params. After requantizing Q_i, equation (1) becames equation
+  // (3) where Q_i' is the requantized data from Q_i.
+  //
+  //    scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o)  when Q_i < zp_i (3)
+  //                       Q_o = alpha * Q_i' + (1 - alpha) * zp_o  when Q_i < zp_i (4)
+  ICHECK_EQ(new_args.size(), 5);
+  Expr data = Cast(new_args[0], DataType::Int(32));
+  Expr input_scale = new_args[1];
   Expr input_zero_point = Cast(new_args[2], DataType::Int(32));
+  Expr output_scale = new_args[3];
+  Expr output_zero_point = Cast(new_args[4], DataType::Int(32));
 
   const auto* q_attrs = attrs.as<LeakyReluAttrs>();
   auto alpha = q_attrs->alpha;
 
+  const auto input_shape = get_shape(arg_types[0]);
+  const auto input_dtype = arg_types[0].as<TensorTypeNode>()->dtype;
+
+  // requantize the input to Q_i'
+  auto requantized_expr = RequantizeOrUpcast(data, input_scale, input_zero_point, output_scale,
+                                             output_zero_point, input_shape);
+
+  // alpha * Q_i'
   int32_t fixed_point_multiplier, shift;
   std::tie(fixed_point_multiplier, shift) = GetFixedPointMultiplierShift(alpha);
-  auto prod = FixedPointMultiply(quantized_data, fixed_point_multiplier, shift);
+  auto prod = FixedPointMultiply(requantized_expr, fixed_point_multiplier, shift);
 
+  // (1 - alpha) * zp_o
   int32_t fixed_point_multiplier_z, shift_z;
   std::tie(fixed_point_multiplier_z, shift_z) = GetFixedPointMultiplierShift(1 - alpha);
-  auto scaled_z = FixedPointMultiply(input_zero_point, fixed_point_multiplier_z, shift_z);
+  auto scaled_z = FixedPointMultiply(output_zero_point, fixed_point_multiplier_z, shift_z);
 
+  // alpha * Q_i + (1 - alpha) * zp_o
   auto add = Add(prod, scaled_z);
-  auto output = Where(Less(quantized_data, input_zero_point), add, quantized_data);
+  auto output = Where(Less(data, input_zero_point), add, data);

Review Comment:
   So `add` will be have quantization parameters of `output_scale`, `output_zero_point` which is right while `data` will have quantization parameters of `input_scale` and `input_zero_point`. Believe you must grab from requantized_expr.



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)
+  //
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensor to the output qnn params. After requantizing Q_i, equation (1) becames equation
+  // (3) where Q_i' is the requantized data from Q_i.
+  //
+  //    scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o)  when Q_i < zp_i (3)
+  //                       Q_o = alpha * Q_i' + (1 - alpha) * zp_o  when Q_i < zp_i (4)
+  ICHECK_EQ(new_args.size(), 5);
+  Expr data = Cast(new_args[0], DataType::Int(32));
+  Expr input_scale = new_args[1];
   Expr input_zero_point = Cast(new_args[2], DataType::Int(32));
+  Expr output_scale = new_args[3];
+  Expr output_zero_point = Cast(new_args[4], DataType::Int(32));
 
   const auto* q_attrs = attrs.as<LeakyReluAttrs>();
   auto alpha = q_attrs->alpha;
 
+  const auto input_shape = get_shape(arg_types[0]);
+  const auto input_dtype = arg_types[0].as<TensorTypeNode>()->dtype;
+
+  // requantize the input to Q_i'
+  auto requantized_expr = RequantizeOrUpcast(data, input_scale, input_zero_point, output_scale,
+                                             output_zero_point, input_shape);
+
+  // alpha * Q_i'
   int32_t fixed_point_multiplier, shift;
   std::tie(fixed_point_multiplier, shift) = GetFixedPointMultiplierShift(alpha);
-  auto prod = FixedPointMultiply(quantized_data, fixed_point_multiplier, shift);
+  auto prod = FixedPointMultiply(requantized_expr, fixed_point_multiplier, shift);
 
+  // (1 - alpha) * zp_o
   int32_t fixed_point_multiplier_z, shift_z;
   std::tie(fixed_point_multiplier_z, shift_z) = GetFixedPointMultiplierShift(1 - alpha);
-  auto scaled_z = FixedPointMultiply(input_zero_point, fixed_point_multiplier_z, shift_z);
+  auto scaled_z = FixedPointMultiply(output_zero_point, fixed_point_multiplier_z, shift_z);
 
+  // alpha * Q_i + (1 - alpha) * zp_o

Review Comment:
   // alpha * Q_i' + (1 - alpha) * zp_o



##########
src/relay/qnn/op/leaky_relu.cc:
##########
@@ -82,42 +87,62 @@ Expr QnnLeakyReluCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
   // by a small alpha value < 1.
   //
   // We assume the same scale and zero point for alpha and the input tensor.
-  // Let T = s(q_t - z) where q_t is the input arg[0]
-  // Then, the quantized value of alpha * T is:
-  // q(a * T, s, z) = [(a * T) / s] + z = a * s(q_t - z) / s + z = a * (q_t - z) + z
-  // = a * q_t + (1 - a) * z
+  // LeakyReLU can be written in terms of respective quantized tensors, scales and
+  // zero points as
   //
-  // We return the quantized value of alpha * T for all values q_t < input_zero_point.
-
-  ICHECK_EQ(new_args.size(), 3);
-  Expr quantized_data = Cast(new_args[0], DataType::Int(32));
+  //    scale_o * (Q_o - zp_o) = alpha * scale_i * (Q_i - zp_i)  when Q_i < zp_i  (1)
+  //                       Q_o = Q_i when Q_i >= zp_i  (2)
+  //
+  // Since the input qnn params can be different than output qnn params, we first requantize the
+  // input tensor to the output qnn params. After requantizing Q_i, equation (1) becames equation
+  // (3) where Q_i' is the requantized data from Q_i.
+  //
+  //    scale_o * (Q_o - zp_o) = alpha * scale_o * (Q_i' - zp_o)  when Q_i < zp_i (3)
+  //                       Q_o = alpha * Q_i' + (1 - alpha) * zp_o  when Q_i < zp_i (4)

Review Comment:
   For complete sake, also include case where `Q_i >= zp_i`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org