You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2020/01/06 07:39:15 UTC

[GitHub] [incubator-tvm] anijain2305 opened a new pull request #4629: WIP [QNN] Channel wise quantization - Quantize & Requantize

anijain2305 opened a new pull request #4629: WIP [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629
 
 
   WIP - RFC in progress. Once RFC posted, WIP tag will be removed.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363431509
 
 

 ##########
 File path: include/tvm/relay/qnn/attrs.h
 ##########
 @@ -33,10 +33,14 @@ namespace qnn {
 
 /*! \brief Attribute for requantize operator */
 struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
+  int axis;
   std::string rounding;
   DataType out_dtype;
 
   TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
+    TVM_ATTR_FIELD(axis)
+      .describe("The output channel axis for channel wise quantization.")
 
 Review comment:
   explain what -1 means since it's a special value

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363864178
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,69 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  if (const_expr) {
+    return const_expr->is_scalar();
+  }
+  return false;
+}
+
 /*!
  * \brief Create a Constant with a scalar
  *
  * \param dtype The data type.
  * \param value The value of the scalar.
  * \return A Constant.
  */
-template<typename T>
+template <typename T>
 inline Constant MakeConstantScalar(DataType dtype, T value) {
   runtime::NDArray arr = runtime::NDArray::Empty({}, dtype, {kDLCPU, 0});
   TVM_DTYPE_DISPATCH(dtype, DType, {
     if (dtype == DataType::Float(16)) {
       // convert to float16
       // storage is uint16_t
       *static_cast<DType*>(arr->data) =
-        __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
+          __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
     } else {
       *static_cast<DType*>(arr->data) = value;
     }
   })
   return ConstantNode::make(arr);
 }
 
+/*!
+ * \brief Create a Constant with a tensor.
+ *
+ * \param dtype The data type.
+ * \param value The vector of the tensor values.
+ * \return A Constant.
+ */
+template <typename T>
+static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> shape,
+                                          std::vector<T> value) {
+  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
 
 Review comment:
   can we also templatize of device type with default beding kDLCPU?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] zhiics commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

zhiics commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571685764
 
 
   @tmoreau89 Could you please manage the PR?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363580810
 
 

 ##########
 File path: src/relay/qnn/util.h
 ##########
 @@ -132,11 +160,47 @@ Expr FixedPointMultiply(Expr tensor, double multiplier, const Array<IndexExpr>&
  */
 static inline bool IsScalarType(const Type& expr_type, const DataType& dtype) {
   const auto* scale = expr_type.as<TensorTypeNode>();
-  CHECK_EQ(scale->shape.size(), 0);
+  if (scale->shape.size() != 0) {
+    return false;
+  }
   CHECK(scale->dtype == dtype) << "Expected " << dtype << " but got " << scale->dtype;
   return true;
 }
 
 Review comment:
   I understand there is nothing too wrong here, but the `IsScalarType()` naming seems not enforce a same type here, leading to confusing logic in `AssignQnnParamType`....

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571795854
 
 
   Thank you @anijain2305 , @shoubhik , @jackwish ; the PR has been merged

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571686785
 
 
   @zhiics on it, thanks

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363578029
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,66 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  return const_expr && const_expr->is_scalar();
+}
+
 /*!
  * \brief Create a Constant with a scalar
  *
  * \param dtype The data type.
  * \param value The value of the scalar.
  * \return A Constant.
  */
-template<typename T>
+template <typename T>
 inline Constant MakeConstantScalar(DataType dtype, T value) {
   runtime::NDArray arr = runtime::NDArray::Empty({}, dtype, {kDLCPU, 0});
   TVM_DTYPE_DISPATCH(dtype, DType, {
     if (dtype == DataType::Float(16)) {
       // convert to float16
       // storage is uint16_t
       *static_cast<DType*>(arr->data) =
-        __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
+          __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
     } else {
       *static_cast<DType*>(arr->data) = value;
     }
   })
   return ConstantNode::make(arr);
 }
 
+/*!
+ * \brief Create a Constant with a tensor.
+ *
+ * \param dtype The data type.
+ * \param value The vector of the tensor values.
+ * \return A Constant.
+ */
+template <typename T>
+static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> shape,
+                                          std::vector<T> value) {
+  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
+  for (size_t i = 0; i < value.size(); i++) {
+    TVM_DTYPE_DISPATCH(dtype, DType, {
+      if (dtype == DataType::Float(16)) {
 
 Review comment:
   maybe put the `for` inside `if`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363432091
 
 

 ##########
 File path: src/relay/qnn/util.h
 ##########
 @@ -124,6 +125,32 @@ static inline int64_t get_const_int(const tvm::Expr& x) {
 Expr FixedPointMultiply(Expr tensor, double multiplier, const Array<IndexExpr>& input_shape,
                         const std::string& rounding);
 
+/*
+ * \brief Fixed point multiplication between integer tensor with floating point
+ scalar where the input tensor is per-axis/per-channel quantized..
+ * \param tensor The quantized input tensor of dtype int64.
+ * \param multiplier The scalar multiplier.
+ * \param input_shape Shape of the input tensor.
+ * \param axis The axis along which the input tensor is quantized.
 
 Review comment:
   add mention of special case value `-1`

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363591314
 
 

 ##########
 File path: src/relay/qnn/op/requantize.cc
 ##########
 @@ -157,16 +175,22 @@ bool RequantizeRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
         in_dtype == DataType::Int(32))
       << "Input type should be one of [int8, uint8, int32] but was " << in_dtype;
 
-  // Check the types of scale and zero points.
-  CHECK(IsScalarType(types[1], DataType::Float(32)));  // input_scale
-  CHECK(IsScalarType(types[2], DataType::Int(32)));    // input_zero_point
+  const RequantizeAttrs* requantize_attrs = attrs.as<RequantizeAttrs>();
+  int axis = requantize_attrs->axis;
+  axis = (axis == -1) ? data->shape.size() - 1: axis;
+  CHECK_LE(axis, static_cast<int>(data->shape.size()))
 
 Review comment:
   Same with `quantize`, should be `CHECK_LT` here?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on issue #4629: WIP [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on issue #4629: WIP [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571223963
 
 
   RFC - https://discuss.tvm.ai/t/qnn-channel-wise-quantization/5300
   
   Please review - @jackwish @FrozenGene @shoubhik @u99127 @Leo-arm @vinx13 @zhiics 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571765641
 
 
   @shoubhik Thanks for the comments. Addressed.
   @tmoreau89 CI passing now.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363431643
 
 

 ##########
 File path: include/tvm/relay/qnn/attrs.h
 ##########
 @@ -56,10 +60,14 @@ struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
 /*! \brief Attribute for quantize operator */
 struct QuantizeAttrs : public tvm::AttrsNode<QuantizeAttrs> {
   DataType out_dtype;
+  int axis;
 
   TVM_DECLARE_ATTRS(QuantizeAttrs, "relay.attrs.QuantizeAttrs") {
     TVM_ATTR_FIELD(out_dtype)
       .describe("Output data type, can be one of [int8 or uint8].");
+    TVM_ATTR_FIELD(axis)
+      .describe("The output channel axis for channel wise quantization.")
 
 Review comment:
   idem

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363577646
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,66 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  return const_expr && const_expr->is_scalar();
 
 Review comment:
   what about comparing `const_expr` against `nullptr`.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363894702
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,69 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  if (const_expr) {
+    return const_expr->is_scalar();
+  }
+  return false;
+}
+
 /*!
  * \brief Create a Constant with a scalar
  *
  * \param dtype The data type.
  * \param value The value of the scalar.
  * \return A Constant.
  */
-template<typename T>
+template <typename T>
 inline Constant MakeConstantScalar(DataType dtype, T value) {
   runtime::NDArray arr = runtime::NDArray::Empty({}, dtype, {kDLCPU, 0});
   TVM_DTYPE_DISPATCH(dtype, DType, {
     if (dtype == DataType::Float(16)) {
       // convert to float16
       // storage is uint16_t
       *static_cast<DType*>(arr->data) =
-        __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
+          __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
     } else {
       *static_cast<DType*>(arr->data) = value;
     }
   })
   return ConstantNode::make(arr);
 }
 
+/*!
+ * \brief Create a Constant with a tensor.
+ *
+ * \param dtype The data type.
+ * \param value The vector of the tensor values.
+ * \return A Constant.
+ */
+template <typename T>
+static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> shape,
+                                          std::vector<T> value) {
+  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
 
 Review comment:
   Good catch but its not needed. The constants are always created in CPU device, as they have to go through FoldConstant pass (that can only run on CPU). After we have a compiler graph, if the graph is supposed to run on GPU, we can create a Constant GPU object later on. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363431570
 
 

 ##########
 File path: python/tvm/relay/qnn/op/qnn.py
 ##########
 @@ -95,6 +101,8 @@ def quantize(data,
         The output zero_point.
     output_scale : tvm.relay.Expr
         The output scale.
+    axis : int
+        The channel axis for quantization.
 
 Review comment:
   idem

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363617463
 
 

 ##########
 File path: src/relay/qnn/op/quantize.cc
 ##########
 @@ -71,13 +78,29 @@ Expr MakeQuantize(Expr data, Expr output_scale, Expr output_zero_point, DataType
 }
 
 Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
-                   const Expr& output_zero_point, const QuantizeAttrs* attrs) {
+                   const Expr& output_zero_point, const Array<IndexExpr>& input_shape,
+                   const QuantizeAttrs* attrs) {
   const auto out_dtype = attrs->out_dtype;
+  const auto axis = attrs->axis;
+
+  size_t n_dim = input_shape.size();
+
+  auto expanded_output_scale = output_scale;
+  if (!IsConstScalar(output_scale)) {
+    expanded_output_scale = ExpandBiasToMatchAxis(output_scale, n_dim, {axis});
 
 Review comment:
   This helper is used in many other passes. We can keep this name for now. And then send a separate PR for better naming across all those files.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 merged pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 merged pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629
 
 
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363454319
 
 

 ##########
 File path: python/tvm/relay/qnn/op/qnn.py
 ##########
 @@ -53,6 +54,9 @@ def requantize(data,
     output_zero_point: tvm.relay.Expr
         The zero point of the output tensor.
 
+    axis : int
+        The channel axis for quantization.
 
 Review comment:
   Gotcha, that makes sense! Just want to avoid confusion with a "special case value".

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

tmoreau89 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363430151
 
 

 ##########
 File path: python/tvm/relay/qnn/op/qnn.py
 ##########
 @@ -53,6 +54,9 @@ def requantize(data,
     output_zero_point: tvm.relay.Expr
         The zero point of the output tensor.
 
+    axis : int
+        The channel axis for quantization.
 
 Review comment:
   Can we add a comment on what a -1 (default) argument value means for `axis`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] zhiics edited a comment on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

zhiics edited a comment on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571685764
 
 
   @tmoreau89 Could you please manage the PR?
   @anijain2305 please retrigger CI

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363581054
 
 

 ##########
 File path: src/relay/qnn/util.h
 ##########
 @@ -132,11 +160,47 @@ Expr FixedPointMultiply(Expr tensor, double multiplier, const Array<IndexExpr>&
  */
 static inline bool IsScalarType(const Type& expr_type, const DataType& dtype) {
   const auto* scale = expr_type.as<TensorTypeNode>();
-  CHECK_EQ(scale->shape.size(), 0);
+  if (scale->shape.size() != 0) {
+    return false;
+  }
   CHECK(scale->dtype == dtype) << "Expected " << dtype << " but got " << scale->dtype;
   return true;
 }
 
+/*
+ * \brief Checks and assigns types to scale and zero points.
+ * \param expr_type The type of expr to be checked.
+ * \param dtype The expected dtype.
+ * \param shape The shape at C dim of original tensor.
+ * \param reporter The type reported of original InferType call.
+ */
+static inline void AssignQnnParamType(const Type& expr_type, const DataType& dtype,
 
 Review comment:
   Suggesting not naming like this, maybe no `QnnParam` words. To me, function naming could be *what the function does* rather than *how the function is used sometime*.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363453395
 
 

 ##########
 File path: python/tvm/relay/qnn/op/qnn.py
 ##########
 @@ -53,6 +54,9 @@ def requantize(data,
     output_zero_point: tvm.relay.Expr
         The zero point of the output tensor.
 
+    axis : int
+        The channel axis for quantization.
 
 Review comment:
   Good catch! This just means the last axis (similar to other ops). I will add.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363865764
 
 

 ##########
 File path: src/relay/qnn/op/quantize.cc
 ##########
 @@ -45,11 +45,16 @@ bool QuantizeRel(const Array<Type>& types,
   CHECK(input_dtype == DataType::Float(32))
     << "Input type should be one of float32 but was " <<  input_dtype;
 
-  // Check the types of scale and zero points.
-  CHECK(IsScalarType(types[1], DataType::Float(32)));  // output_scale
-  CHECK(IsScalarType(types[2], DataType::Int(32)));    // output_zero_point
-
   const auto* quantize_attrs = attrs.as<QuantizeAttrs>();
+  int axis = quantize_attrs->axis;
+  axis = (axis == -1) ? data->shape.size() - 1: axis;
+  CHECK_LT(axis, static_cast<int>(data->shape.size()))
 
 Review comment:
   can you also add CHECK_GT to check for the other side as well.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on issue #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#issuecomment-571467246
 
 
   > LGTM. Several code style comments :)
   
   Thanks for the helpful comments @jackwish All the comments were very good and have been addressed :)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] u99127 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

u99127 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r364403117
 
 

 ##########
 File path: include/tvm/relay/qnn/attrs.h
 ##########
 @@ -33,10 +33,15 @@ namespace qnn {
 
 /*! \brief Attribute for requantize operator */
 struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
+  int axis;
   std::string rounding;
   DataType out_dtype;
 
   TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
+    TVM_ATTR_FIELD(axis)
+      .describe("The output channel axis for channel wise quantization. Default value is -1,"
+                "which corresponds to the last axis.")
 
 Review comment:
   Never mind , later in the thread things are a bit clearer. The axis of -1 is fine - it is part of the per-channel quantization scheme . 
   
   In some sense I would have preferred to see a IsPerChannelQuantized predicate, rather than reusing a IsConstScalar predicate further below but that's a code hygiene thing.
   
   And , yes sorry about the slow response, I'm a bit snowed under.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

shoubhik commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363864580
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,69 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  if (const_expr) {
+    return const_expr->is_scalar();
+  }
+  return false;
+}
+
 /*!
  * \brief Create a Constant with a scalar
  *
  * \param dtype The data type.
  * \param value The value of the scalar.
  * \return A Constant.
  */
-template<typename T>
+template <typename T>
 inline Constant MakeConstantScalar(DataType dtype, T value) {
   runtime::NDArray arr = runtime::NDArray::Empty({}, dtype, {kDLCPU, 0});
   TVM_DTYPE_DISPATCH(dtype, DType, {
     if (dtype == DataType::Float(16)) {
       // convert to float16
       // storage is uint16_t
       *static_cast<DType*>(arr->data) =
-        __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
+          __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
     } else {
       *static_cast<DType*>(arr->data) = value;
     }
   })
   return ConstantNode::make(arr);
 }
 
+/*!
+ * \brief Create a Constant with a tensor.
+ *
+ * \param dtype The data type.
+ * \param value The vector of the tensor values.
+ * \return A Constant.
+ */
+template <typename T>
+static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> shape,
+                                          std::vector<T> value) {
+  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
+  TVM_DTYPE_DISPATCH(dtype, DType, {
+    for (size_t i = 0; i < value.size(); i++) {
+      if (dtype == DataType::Float(16)) {
+        // convert to float16
+        // storage is uint16_t
+        *(static_cast<DType*>(arr->data) + i) =
 
 Review comment:
   can you add a commnet as to why this conversion is needed?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363581404
 
 

 ##########
 File path: src/relay/qnn/util.h
 ##########
 @@ -124,6 +125,33 @@ static inline int64_t get_const_int(const tvm::Expr& x) {
 Expr FixedPointMultiply(Expr tensor, double multiplier, const Array<IndexExpr>& input_shape,
                         const std::string& rounding);
 
+/*
+ * \brief Fixed point multiplication between integer tensor with floating point
+ scalar where the input tensor is per-axis/per-channel quantized..
+ * \param tensor The quantized input tensor of dtype int64.
+ * \param multiplier The scalar multiplier.
+ * \param input_shape Shape of the input tensor.
+ * \param axis The axis along which the input tensor is quantized. Default value is -1 which
+ corresponds to the last axis.
+ * \param rounding "UPWARD" or "TONEAREST". The rounding direction when the value
+ is midway between" "two representable values.
+ * \return The sequence of Relay ops for fixed point multiplication.
+
+ * \note Original compuation is scale_fp32 * quantized_tensor.  To convert into
+ *       integer computation, the multiplication with fp32 vector can be
+ *       replaced by multiplication with an int vector and then right shifting
+ *       the result. This approximates the floating point computation with a
+ *       fixed point computation.
+ *
+ *       Computation of fixed point multiplication is consist of following
+ steps:
+ *       1) Multiply the fixed point multiplier with quantized tensor.
+ *       2) Round the result.
+ *       3) Right shift the result
+ */
+Expr FixedPointMultiplyPerChannel(Expr tensor, std::vector<double> multiplier,
 
 Review comment:
   Naming things... If we are using this name, maybe name `axis` to be `channel_axis`? Otherwise, maybe we can remove `channel` semantic from the naming.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363578316
 
 

 ##########
 File path: src/relay/qnn/op/quantize.cc
 ##########
 @@ -71,13 +78,29 @@ Expr MakeQuantize(Expr data, Expr output_scale, Expr output_zero_point, DataType
 }
 
 Expr QuantizeLower(const Expr& input_tensor, const Expr& output_scale,
-                   const Expr& output_zero_point, const QuantizeAttrs* attrs) {
+                   const Expr& output_zero_point, const Array<IndexExpr>& input_shape,
+                   const QuantizeAttrs* attrs) {
   const auto out_dtype = attrs->out_dtype;
+  const auto axis = attrs->axis;
+
+  size_t n_dim = input_shape.size();
+
+  auto expanded_output_scale = output_scale;
+  if (!IsConstScalar(output_scale)) {
+    expanded_output_scale = ExpandBiasToMatchAxis(output_scale, n_dim, {axis});
 
 Review comment:
   what about renaming `ExpandBiasToMatchAxis` to something more generic?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] u99127 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

u99127 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r364400908
 
 

 ##########
 File path: include/tvm/relay/qnn/attrs.h
 ##########
 @@ -33,10 +33,15 @@ namespace qnn {
 
 /*! \brief Attribute for requantize operator */
 struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
+  int axis;
   std::string rounding;
   DataType out_dtype;
 
   TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
+    TVM_ATTR_FIELD(axis)
+      .describe("The output channel axis for channel wise quantization. Default value is -1,"
+                "which corresponds to the last axis.")
 
 Review comment:
   I don't have an opinion on whether this is the right approach or not but the question that popped in my mind was : 
   
   Why not be more explicit and say that axis == -1 actually means per-tensor quantization ?
   
   Thanks,
   Ramana

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

jackwish commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363578507
 
 

 ##########
 File path: src/relay/qnn/op/quantize.cc
 ##########
 @@ -45,11 +45,16 @@ bool QuantizeRel(const Array<Type>& types,
   CHECK(input_dtype == DataType::Float(32))
     << "Input type should be one of float32 but was " <<  input_dtype;
 
-  // Check the types of scale and zero points.
-  CHECK(IsScalarType(types[1], DataType::Float(32)));  // output_scale
-  CHECK(IsScalarType(types[2], DataType::Int(32)));    // output_zero_point
-
   const auto* quantize_attrs = attrs.as<QuantizeAttrs>();
+  int axis = quantize_attrs->axis;
+  axis = (axis == -1) ? data->shape.size() - 1: axis;
+  CHECK_LE(axis, static_cast<int>(data->shape.size()))
 
 Review comment:
   Should be `CHECK_LT`?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

[GitHub] [incubator-tvm] anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize

Posted by GitBox <gi...@apache.org>.

anijain2305 commented on a change in pull request #4629: [QNN] Channel wise quantization - Quantize & Requantize
URL: https://github.com/apache/incubator-tvm/pull/4629#discussion_r363894702
 
 

 ##########
 File path: src/relay/pass/pattern_util.h
 ##########
 @@ -221,29 +222,69 @@ inline bool IsScalar(const Expr& expr) {
   return true;
 }
 
+/*!
+ * \brief Check if expr is a const scalar.
+ * \param expr The expr.
+ * \return True if const scalar.
+ */
+inline bool IsConstScalar(const Expr& expr) {
+  const auto* const_expr = expr.as<ConstantNode>();
+  if (const_expr) {
+    return const_expr->is_scalar();
+  }
+  return false;
+}
+
 /*!
  * \brief Create a Constant with a scalar
  *
  * \param dtype The data type.
  * \param value The value of the scalar.
  * \return A Constant.
  */
-template<typename T>
+template <typename T>
 inline Constant MakeConstantScalar(DataType dtype, T value) {
   runtime::NDArray arr = runtime::NDArray::Empty({}, dtype, {kDLCPU, 0});
   TVM_DTYPE_DISPATCH(dtype, DType, {
     if (dtype == DataType::Float(16)) {
       // convert to float16
       // storage is uint16_t
       *static_cast<DType*>(arr->data) =
-        __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
+          __truncXfYf2__<float, uint32_t, 23, uint16_t, uint16_t, 10>(static_cast<float>(value));
     } else {
       *static_cast<DType*>(arr->data) = value;
     }
   })
   return ConstantNode::make(arr);
 }
 
+/*!
+ * \brief Create a Constant with a tensor.
+ *
+ * \param dtype The data type.
+ * \param value The vector of the tensor values.
+ * \return A Constant.
+ */
+template <typename T>
+static inline Constant MakeConstantTensor(DataType dtype, std::vector<int64_t> shape,
+                                          std::vector<T> value) {
+  runtime::NDArray arr = runtime::NDArray::Empty(shape, dtype, {kDLCPU, 0});
 
 Review comment:
   Good catch but its not needed. The constants are always created in CPU device, as they have to go through FoldConstant pass. After we have a compiler graph, if the graph is supposed to run on GPU, we can create a Constant GPU object later on. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services