You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/11/05 10:41:12 UTC

[GitHub] [tvm] ashutosh-arm opened a new pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

ashutosh-arm opened a new pull request #9456:
URL: https://github.com/apache/tvm/pull/9456


   Support for fully connected layer via CMSIS-NN.
   
   Other PRs that need to be merged before this one:
   #9454 
   #9338 
   #9331 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761251456



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims

Review comment:
       But I do see the problem as just this comment may not suffice.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761229235



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       nit : Would it be possible to keep the definitions of sub-catergories that get concatenated closer to here ?  So it is easy to follow ?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims

Review comment:
       What do these comments mean ?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";

Review comment:
       nit : I think we can just use a immediate value to extern call. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761260496



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       e.g.
   ```
   Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
   scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
   
   Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
   scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761264345



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       Can't do because of a cross dependency in this case :confused: 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#issuecomment-961791022


   cc: @Mousius @manupa-arm @leandron @areusch for code reviews.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#issuecomment-961791022


   cc: @Mousius @manupa-arm @leandron @areusch for code reviews.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761254729



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";

Review comment:
       Guess, followed suite of other operators. I will pass it directly now.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761258403



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       I didn't get this. Which ones should be close together and are not?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm closed pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm closed pull request #9456:
URL: https://github.com/apache/tvm/pull/9456


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761250656



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims

Review comment:
       They are the names of the CMSIS-NN structs. Someone developing / maintaining the code can immediately look them up into the CMSIS-NN repo.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#issuecomment-961791022


   cc: @Mousius @manupa-arm @leandron @areusch for code reviews.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761264345



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       Can't do because of a cross dependency in this case.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9456: [6/10] Code generation for fully connected layer via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9456:
URL: https://github.com/apache/tvm/pull/9456#discussion_r761261906



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -234,6 +222,114 @@ class RelayToTIRVisitor : public MixedModeMutator {
                             context_buffer_size);
   }
 
+  void EmitFullyConnected(const GlobalVar& global_var, const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* fc_call = nullptr;
+    const CallNode* final_call = expr.as<CallNode>();
+    const OpNode* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    const CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
+    const OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      fc_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      fc_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.dense(%input, %weight_const_0, input_zero_point_scalar, kernel_zero_point_scalar,
+    //                 %input_scale_scalar, %kernel_scale_scalar)
+    // %2 = nn.bias_add(%1, %bias_const_1, axis=1)
+    // %3 = qnn.requantize(%2, %req_input_scale_scalar, %req_input_zero_point_scalar,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    tir::Var input("input", DataType::Handle(8));
+    tir::Var filter("filter", DataType::Handle(8));
+    tir::Var bias("bias", DataType::Handle(32));
+    tir::Var output("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_fc_params
+    const DenseAttrs* dense_attrs = fc_call->attrs.as<DenseAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(fc_call->args[2]);
+    int32_t filter_offset = -GetScalarFromConstant<int32_t>(fc_call->args[3]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    float input_scale = GetScalarFromConstant<float>(requantize_call->args[1]);
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    int32_t out_channels = qnn::get_const_int(dense_attrs->units);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      const ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    double quantized_multiplier =
+        static_cast<double>(input_scale) / static_cast<double>(output_scale);
+    auto mult_shift_pair = tvm::relay::qnn::GetFixedPointMultiplierShift(quantized_multiplier);
+    int32_t multiplier = std::get<0>(mult_shift_pair);
+    int32_t shift = std::get<1>(mult_shift_pair);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(filter_offset), ToArg(output_offset), ToArg(clip_min),
+        ToArg(clip_max),     ToArg(multiplier),    ToArg(shift)};
+
+    // cmsis_nn_dims *input_dims
+    Array<PrimExpr> input_shape = fc_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t batch_size = qnn::get_const_int(input_shape[0]);
+    int32_t in_channels = qnn::get_const_int(input_shape[1]);
+    Array<PrimExpr> cmsisnn_input_shape{input_shape[0], 1, 1, input_shape[1]};
+
+    // cmsis_nn_dims *filter_dims
+    Array<PrimExpr> cmsisnn_filter_shape{in_channels, 1, 1, out_channels};
+
+    // cmsis_nn_dims *bias_dims
+    Array<PrimExpr> bias_shape{1, 1, 1, out_channels};
+
+    // cmsis_nn_dims *output_dims
+    Array<PrimExpr> cmsisnn_output_shape{batch_size, 1, 1, out_channels};
+
+    std::string cmsisnn_api = "arm_fully_connected_s8";
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm(cmsisnn_api), input, filter};
+    if (bias_add_call) {
+      call_ext_args.push_back(bias);
+    }
+    call_ext_args.push_back(output);
+
+    int context_buffer_size = 0;
+    std::string context_buffer_name = "NULL";
+    tvm::Array<PrimExpr> context_buffer_args = {tir::StringImm(context_buffer_name),
+                                                ToArg(context_buffer_size)};
+
+    scalar_args = tvm::runtime::Concat(context_buffer_args, scalar_args);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_input_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_filter_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, bias_shape);
+    scalar_args = tvm::runtime::Concat(scalar_args, cmsisnn_output_shape);
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);

Review comment:
       I see what you mean




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org