You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2021/07/01 14:34:51 UTC

[GitHub] [incubator-mxnet] sfraczek commented on a change in pull request #20302: [FEATURE] Performance improvement by asymmetric quantization Quantize+FC

sfraczek commented on a change in pull request #20302:
URL: https://github.com/apache/incubator-mxnet/pull/20302#discussion_r662345930



##########
File path: src/operator/quantization/quantize_graph_pass.cc
##########
@@ -578,6 +579,153 @@ Graph SetCalibTableToQuantizedGraph(Graph&& g) {
   return g;
 }
 
+static NDArray* FindInArgByName(const Graph &g, const std::string& name) {
+  const std::vector<std::string>& in_arg_names =
+      g.GetAttr<std::vector<std::string>>("in_arg_names");
+  size_t i =
+      std::distance(in_arg_names.begin(),
+                    std::find(in_arg_names.begin(), in_arg_names.end(), name));
+  if (i == in_arg_names.size()) {
+    throw std::runtime_error(name + " not found in in_arg_names");
+  }
+  return g.GetAttr<NDArray **>("in_args")[i];
+}
+
+static inline bool IsFC(const ObjectPtr& n) {
+#if MXNET_USE_MKLDNN == 1
+  if (n->op() == Op::Get("_sg_mkldnn_fully_connected")) {
+    auto const& param = nnvm::get<MKLDNNFCFullParam>(n->attrs.parsed);
+    if (param.default_param.no_bias == false &&
+        n->inputs[2].node->is_variable()) {
+      if (!(param.mkldnn_param.channel_wise_quantize.has_value() &&
+            param.mkldnn_param.channel_wise_quantize.value())) {
+        return true;
+      }
+    }
+  }
+#endif
+  return false;
+}
+
+static inline bool IsQuantize(const ObjectPtr& n) {
+  if (n->op() == Op::Get("_contrib_quantize_v2")) {
+    auto const &param = nnvm::get<QuantizeV2Param>(n->attrs.parsed);
+    if (param.min_calib_range.has_value() &&
+        param.min_calib_range.value() < 0.0f) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Rescales weights, min_weight and max_weight. Returns bias_int32_rescale.
+static inline float RescaleWeights(const Graph &g, const ObjectPtr &fc, NDArray* weight_tensor) {
+  ObjectPtr &quantize = fc->inputs[0].node;
+  auto min_data = std::stof(quantize->attrs.dict.at("min_calib_range"));
+  auto max_data = std::stof(quantize->attrs.dict.at("max_calib_range"));
+
+  float *min_weight = FindInArgByName(g, fc->inputs[5].node->attrs.name)->data().dptr<float>();
+  float *max_weight = FindInArgByName(g, fc->inputs[6].node->attrs.name)->data().dptr<float>();
+  float min_bias = *FindInArgByName(g, fc->inputs[7].node->attrs.name)->data().dptr<float>();
+  float max_bias = *FindInArgByName(g, fc->inputs[8].node->attrs.name)->data().dptr<float>();
+
+  float data_scale_ = kUint8Range / (max_data - min_data);
+  float weight_scale = GetQuantizeScale(kInt8, *min_weight, *max_weight);
+  float bias_scale = GetQuantizeScale(kInt8, min_bias, max_bias);
+  float bias_int32_rescale = data_scale_ * weight_scale / bias_scale;
+
+  // // TODO(zhennan): mkldnn has bug to handle INT_MAX in bias, so set the
+  // // maximum value of bias to INT_MAX / 2.
+  float bias_max_rescale = mshadow::red::limits::MaxValue<int32_t>() / 2 /
+                           MaxAbs(min_bias, max_bias) / bias_scale;
+  if (bias_int32_rescale > bias_max_rescale) {
+    LOG(INFO) << "RESCALING WEIGHTS!";
+    // avoid overflow on bias
+    bias_int32_rescale = bias_max_rescale;
+    float weight_rescale =
+        bias_int32_rescale * bias_scale / data_scale_ / weight_scale;
+
+    size_t weight_size = weight_tensor->shape().Size();
+    int8_t *weight_ptr = weight_tensor->data().dptr<int8_t>();
+    for (int32_t i = 0; i < static_cast<int32_t>(weight_size); ++i) {
+      weight_ptr[i] = std::round(weight_ptr[i] * weight_rescale);
+    }
+    *min_weight *= weight_rescale;
+    *max_weight *= weight_rescale;
+  }
+  return bias_int32_rescale;
+}
+
+static inline void ShiftBias(int32_t* bias_ptr_int32, size_t bias_size,
+                             NDArray* weight_tensor, int32_t shift_value) {
+  CHECK_EQ(static_cast<size_t>(weight_tensor->shape()[0]), bias_size);
+  int8_t* weight_ptr = weight_tensor->data().dptr<int8_t>();
+  for (dim_t i = 0; i < weight_tensor->shape()[0]; ++i) {
+    for (dim_t j = 0; j < weight_tensor->shape()[1]; j++) {
+      bias_ptr_int32[i] -= shift_value * (*weight_ptr++);

Review comment:
       calc_index would have to be like `i*weight_tensor->shape()[1]+j`? I personally preferred incrementation although I don't know if compiler doesn't make it the same code after all




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@mxnet.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org