You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/10/20 16:08:49 UTC

[GitHub] [tvm] ashutosh-arm opened a new pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

ashutosh-arm opened a new pull request #9331:
URL: https://github.com/apache/tvm/pull/9331


   This PR is for support of Conv2D via CMSIS-NN.
   
   Here is the tracking issue: https://github.com/apache/tvm/issues/8646


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735517548



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],

Review comment:
       yes :(




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734670353



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Doesn't this have to modify the IRModule to update the Global Vars so the rest of the passes see it correctly?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733875751



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
+    int32_t filter_n = qnn::get_const_int(filter_shape[0]);
+    int32_t filter_h = qnn::get_const_int(filter_shape[1]);
+    int32_t filter_w = qnn::get_const_int(filter_shape[2]);
+    int32_t filter_c = qnn::get_const_int(filter_shape[3]);
+
+    // cmsis_nn_dims *bias_dims
+    int32_t bias_n = 1;
+    int32_t bias_h = 1;
+    int32_t bias_w = 1;
+    int32_t bias_c = qnn::get_const_int(filter_shape[0]);
+
+    // cmsis_nn_dims *output_dims
+    auto output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;

Review comment:
       ACK

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;

Review comment:
       ACK

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();

Review comment:
       ACK

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733886390



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -69,6 +225,10 @@ class CodeGenCMSISNN : public CodeGenC {
     ss << "}\n";
     ss << "#endif\n";
   }
+
+ private:

Review comment:
       Feels cleaner to tag members separately.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735385521



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       It might be better to stick with `Array` as it's more commonly used than to have a more specific data structure with the tree overhead, wdyt @manupa-arm ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733992868



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW

Review comment:
       Asserts in scope == "global" here https://github.com/apache/tvm/blob/e62075df1f6a2926aebf9b3655ba1284a2c1d8d2/src/target/source/codegen_c.cc#L359




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735463335



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Oops, it seems like we are discussing about a Map rather than an Array.
   Can't we use unordered_map then ?
   
   I'm not sure the Array/Map is commonly used like that especially when it is an internal data structure that is not exposed to python and need to use .find(..) on it.
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/tir/transforms/loop_partition.cc#L331
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L854
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L1008
   
   So two questions here from code quality perspective,
   
    @ashutosh-arm , if the number of partitioned functions are bounded (prefferably to a constant) then it might not make an effect. Is it the case ? -- say if we think of a large network with many operators. If that is the case, it may be fine to overlook this.
   
   @Mousius , Hmmm, The tree is used to make the .find(..) faster. right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743811104



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Although, enabling this has no impact on the generated primfunc, but wanted to keep it simple to have 1-1 mapping of single external Relay function to single primfunc.

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Is there a problem with the first representation?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Enabling this pass gets us the latter one. Irrespective of the Relay that we get post partitioning, RelayToTIR works on composite functions each one of which is translated into a separate primfunc. So, independent of the pass, atm, we end up producing multiple '.c'.
   
   We made the choice of having separate primfunc per operator to get away with the bookkeeping required in RelayToTIR at the cost of having to invoke different C functions at runtime.
   
   @Mousius you want to add something else?
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-947818276


   Adding @Mousius @manupa-arm @grant-arm @areusch for review.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733875657



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;

Review comment:
       Agreed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735831612



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {
+      auto* func = call->op.as<FunctionNode>();
+      auto func_name = func->GetAttr<String>(attr::kComposite);
+      if (func_name.defined() && func_name == "cmsisnn.qnn_conv2d") {
+        Expr new_body = GenerateConv2dRequantConstants(func->body);
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        final_call = Call(new_func, post_call->args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  IRModule mod_;
+};
+
+IRModule GenerateConstants(IRModule mod) {

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735832178



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       Thanks Manupa. I've added all those test combinations for both the passes.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735463335



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Oops, it seems like we are discussing about a Map rather than an Array.
   Can't we use unordered_map then ?
   
   m not sure the Array/Map is commonly used like that especially when it is an internal data structure that is not exposed to python.
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/tir/transforms/loop_partition.cc#L331
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L854
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L1008
   
   So two questions here from code quality perspective,
   
    @ashutosh-arm , if the number of partitioned functions are bounded (prefferably to a constant) then it might not make an effect. Is it the case ? -- say if we think of a large network with many operators. If that is the case, it may be fine to overlook this.
   
   @Mousius , Hmmm, The tree is used to make the .find(..) faster. right ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Oops, it seems like we are discussing about a Map rather than an Array.
   Can't we use unordered_map then ?
   
   I'm not sure the Array/Map is commonly used like that especially when it is an internal data structure that is not exposed to python.
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/tir/transforms/loop_partition.cc#L331
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L854
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L1008
   
   So two questions here from code quality perspective,
   
    @ashutosh-arm , if the number of partitioned functions are bounded (prefferably to a constant) then it might not make an effect. Is it the case ? -- say if we think of a large network with many operators. If that is the case, it may be fine to overlook this.
   
   @Mousius , Hmmm, The tree is used to make the .find(..) faster. right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734682910



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {

Review comment:
       agree, we should rather check for the num of constants that were kicked out




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734711642



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {

Review comment:
       when the last stmt in a partitioned fn is call to a local fn, body does not change. This is the case, where we can retain the original fn.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734712228



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       So, create an output IRModule and return that - just want to confirm?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734707993



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Why does it have to modify the input IRModule ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734793206



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       How is it better than not using a constant time acces daya structure?
   
   What is the cap of # of functions?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966291635


   I am working on resolving the conflicts.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735365616



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Number of partitioned functions is a small number to have any real impact here.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736331766



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       I cant immediately think why say a NAS-optimized network with higher number of operators will not fit in an embedded device just because it has higher number of operators.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r746607740



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);

Review comment:
       I am not sure what you mean




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743822140



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size ? and more translation units produced ? each ext function will become its own .c file right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734422621



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;

Review comment:
       A better design suggestion (feel free to disagree :) ) : We could use a stack of functions that gets pushed and popped when it goes in and out of scope.
   Thus, we dont need to do stateful clearing of constants_within_function_ array and performing the assignment here.
   
   Instead, as and when the visitor encounters a constant that could inserted to a Map<Function, Array<Constant>>.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733863577



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }

Review comment:
       It has crossed my mind, but wanted to see where this pass ends up eventually.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734802676



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       I'm not suggesting it's better, I'm just curious whether it'll have a real impact - `Array` is used heavily through-out the code base.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966222157


   In my view, I don't wish to remove the assert as this is a code that could be reused and fail if caller does not meet asserted criterion. At the same time, testing all internal asserts should not be made into a policy. I say this because at certain times, we know that the previous flow, before this stage is reached, assumes certain things and has its own set of asserts. 
   
   On another note, this cannot be tested in the existing CMSIS-NN testing infrastructure as there is no direct interface to access it. How do we test such APIs currently? Can someone please point me to an example? 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ekalda commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ekalda commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966241622


   > > if you genuinely disagree feel free to merge without test coverage
   > 
   > My position is to keep the assert (as originally added by @ashutosh-arm -- not a request for a change). I only oppose removing it on the basis it is not tested. Im happy to do it in a follow up if thats what @ashutosh-arm wants to do here ?
   > 
   > > his has happened on other PRs I've reviewed.
   > 
   > Hence the questioning here, because this exact point is questioned elsewhere but not here. Other places we committed to follow up if that is direction we want to take. cc : @ekalda @mbaret. Im pretty sure if we agree on this point we will do the necessary changes.
   > 
   > > working on the assumption we want the code to continue working for the forsee-able future
   > 
   > Im pretty sure we all want this -- just being lenient on internal asserts -- that is all.
   
   Yes, I think we agreed to add the tests in a follow-up PR if we decide to test the internal asserts in the future, so it would make sense for this PR to go in as it is as well. 
   
   My (unchanged) view on the topic is that I agree with @manupa-arm  that we should test the user facing asserts, but not the simple developer facing asserts. While it is a good practice to test all the functionality, I think we should always evaluate these kind of guidelines in the context of the specific project if we want to avoid getting stuck in dogmas. It is a valid point that perhaps we should remove the unreachable asserts then, but I think TVM is very much still a project in progress and I have found the developer facing asserts very useful, so I don't think they do any harm. In general, the NPU side of codebase is in constant flux at the moment, so a lot of tests means a lot of extra code to maintain and makes making changes harder. So at that stage I think there is more harm in testing ICHECKs than benefits in a from maintenance overhead.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733875506



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733881775



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW

Review comment:
       It seems to get generated on the stack anyway due to the heuristic so far, but I didn't try `global.workspace` - why didn't that work here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733884454



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);
+    int output_offset = get_arg_value(op, ++arg_id);
+    int stride_w = get_arg_value(op, ++arg_id);
+    int stride_h = get_arg_value(op, ++arg_id);
+    int padding_w = get_arg_value(op, ++arg_id);
+    int padding_h = get_arg_value(op, ++arg_id);
+    int dilation_w = get_arg_value(op, ++arg_id);
+    int dilation_h = get_arg_value(op, ++arg_id);
+    int clip_min = get_arg_value(op, ++arg_id);
+    int clip_max = get_arg_value(op, ++arg_id);
+    int input_n = get_arg_value(op, ++arg_id);
+    int input_h = get_arg_value(op, ++arg_id);
+    int input_w = get_arg_value(op, ++arg_id);
+    int input_c = get_arg_value(op, ++arg_id);
+    int filter_n = get_arg_value(op, ++arg_id);
+    int filter_h = get_arg_value(op, ++arg_id);
+    int filter_w = get_arg_value(op, ++arg_id);
+    int filter_c = get_arg_value(op, ++arg_id);
+    int bias_n = get_arg_value(op, ++arg_id);
+    int bias_h = get_arg_value(op, ++arg_id);
+    int bias_w = get_arg_value(op, ++arg_id);
+    int bias_c = get_arg_value(op, ++arg_id);
+    int output_n = get_arg_value(op, ++arg_id);
+    int output_h = get_arg_value(op, ++arg_id);
+    int output_w = get_arg_value(op, ++arg_id);
+    int output_c = get_arg_value(op, ++arg_id);
+
+    // TODO(ashutosh-arm) for mve code, need to look for tir allocate
+    std::string context = EmitCMSISNNContext(stream, context_buffer_name_, context_buffer_size_);
+    std::string conv_params =
+        EmitCMSISNNConvParams(stream, input_offset, output_offset, stride_w, stride_h, padding_w,
+                              padding_h, dilation_w, dilation_h, clip_min, clip_max);
+    std::string quant_params = EmitCMSISNNPerChannelQuantParams(stream, multiplier, shift);
+    std::string input_dim = EmitCMSISNNDims(stream, "input", input_n, input_h, input_w, input_c);
+    std::string filter_dim =
+        EmitCMSISNNDims(stream, "filter", filter_n, filter_h, filter_w, filter_c);
+    std::string bias_dim = EmitCMSISNNDims(stream, "bias", bias_n, bias_h, bias_w, bias_c);
+    std::string output_dim =
+        EmitCMSISNNDims(stream, "output", output_n, output_h, output_w, output_c);
+
+    PrintIndent();
+    stream << "arm_status status = ";
+    stream << cmsis_func_name << "(";
+    stream << "&" << context << ", ";
+    stream << "&" << conv_params << ", ";
+    stream << "&" << quant_params << ", ";
+    stream << "&" << input_dim << ", " << input_data << ", ";
+    stream << "&" << filter_dim << ", " << filter_data << ", ";
+    stream << "&" << bias_dim << ", " << bias_data << ", ";
+    stream << "&" << output_dim << ", " << output_data << ");\n";
+    PrintIndent();
+    stream << "if (status != ARM_MATH_SUCCESS) {\n";
+    PrintIndent();
+    PrintIndent();
+    stream << "printf(\"Failed during execution of " << cmsis_func_name << "().\");\n";

Review comment:
       As discussed earlier today, we can't. I have removed printf in the meantime. This should get resolved when we address the issue of propagating the return code up.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966193782


   > **If it means we need a add a borderline meaningful test that uses as an assertion to see if the assertion inside the code is triggered, lets do that then**.
   
   It doesn't, you're the committer responsible for this PR, if you genuinely disagree feel free to merge without test coverage - this has happened on other PRs I've reviewed.
   
   > I guess what you are saying is we need a test for the above ICHECK in this PR?
   
   Yip, I would advocate for full test coverage in line with [the TVM Code Review Guidelines](https://github.com/apache/tvm/blob/main/docs/contribute/code_review.rst#factors-to-consider-about-code-quality), working on the assumption we want the code to continue working for the forsee-able future. Though I don't think we should turn this PR into a debate on software testing - that's been going for a long time now :smile_cat: .


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736408874



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       This is resolved now as "Map uses unordered_map" -- thanks @Mousius to pointing this out.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734672888



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       depends on if this pass will go away entirely after we move to constants in tir




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734656979



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       agree, can you please suggest what kind of checks?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735556419



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],

Review comment:
       yes, looks weird from readability pov.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735551325



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733917252



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims

Review comment:
       That looks more readable. Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] areusch commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

areusch commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743906641



##########
File path: tests/python/contrib/test_cmsisnn/test_extract_constants.py
##########
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: extract_constants pass"""
+import itertools
+import math
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+
+from utils import (
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
+
+class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
+    def __init__(self):
+        super().__init__()
+        self.num_constants_ = 0
+
+    def visit_call(self, call):
+        super().visit_call(call)
+        for arg in call.args:
+            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
+                self.num_constants_ += 1
+
+    def visit_function(self, func):
+        super().visit_function(func)
+        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
+
+
+def set_external_func_attr(func, compiler, ext_symbol):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compiler)
+    func = func.with_attr("global_symbol", ext_symbol)
+    return func
+
+
+def test_external_function():
+    y0_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x0 = relay.var("x0", shape=(8, 8))
+    y0_const = relay.const(y0_data, "float32")
+    z0 = x0 + y0_const
+    ef = relay.Function([x0], z0, relay.TensorType((8, 8), "float32"))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+
+    x = relay.var("x", shape=(8, 8))
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_nested_function():
+    y1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x1 = relay.var("x1", shape=(8, 8))
+    y1_const = relay.const(y1_data, "float32")
+    z1 = x1 + y1_const
+    w1 = z1 * relay.const(5.0, "float32")
+    lf = relay.Function([x1], w1, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    c0 = relay.Call(lf, [x0])
+    ef = relay.Function([x0], c0, relay.TensorType((8, 8), "float32"))
+
+    x = relay.var("x", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_multiple_functions():
+    y20_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x20 = relay.var("x20", shape=(8, 8))
+    y20_const = relay.const(y20_data, "float32")
+    z20 = x20 + y20_const
+    f20 = relay.Function([x20], z20, relay.TensorType((8, 8), "float32"))
+
+    y21_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x21 = relay.var("x21", shape=(8, 8))
+    y21_const = relay.const(y21_data, "float32")
+    z21 = x21 + y21_const
+    f21 = relay.Function([x21], z21, relay.TensorType((8, 8), "float32"))
+
+    x10 = relay.var("x10", shape=(8, 8))
+    c10 = relay.Call(f20, [x10])
+    c11 = relay.Call(f21, [c10])
+    ef = relay.Function([x10], c11, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x0])
+    mf = relay.Function([x0], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)

Review comment:
       should we assert anything here? same question above

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);

Review comment:
       is there a way to use the encoding here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743822140



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external function ? and more translation units produced ? each ext function will become its own .c file right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743859261



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       With https://github.com/apache/tvm/pull/9397 the `Target` will produce a single `IRModule` into `runtime::Module` rather than creating many so the functions are just wrappers around the CMSIS-NN calls after that which essentially create the CMSIS-NN structures and call them so that seems the minimal amount for them other than packing multiple CMSIS-NN API calls into a single PrimFunc which we didn't intend to do.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-962006927


   @ashutosh-arm could you retrigger this one? the docker image is updated now.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r750487733



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);

Review comment:
       As discussed offline, I will add deserialization using API we use for serializing dimensions in relay_to_tir. Is it okay to provide this in a follow up? This PR is blocking other changes now. @areusch 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733887138



##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734675178



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions

Review comment:
       we are essentially extracting constants, so none of them seem to match this action :pensive: 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733863577



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }

Review comment:
       It has crossed my mind, but wanted to see where this pass goes ends up being eventually.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions

Review comment:
       I don't like the name extract_constant either. Can you suggest something?

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);

Review comment:
       Didn't know that. Thanks!

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW

Review comment:
       In this version, I am using scope as "global" instead of "global.workspace". I was hoping this will get resolved in the following PR (for 4a) that fixes the buffer sizes. So, this was just a temporary change to fix the issue of fvp hang where the context buffer is needed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734665254



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -69,6 +225,10 @@ class CodeGenCMSISNN : public CodeGenC {
     ss << "}\n";
     ss << "#endif\n";
   }
+
+ private:

Review comment:
       Although it's not explicitly stated, I think the expectation is that by grouping them you shouldn't repeat it (https://google.github.io/styleguide/cppguide.html#Declaration_Order) - but as I say, there's nothing explicit. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734709297



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {

Review comment:
       VisitExpr will create a new Expr right ? Im trying to understand whether is there case where this if is not executed ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735466171



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {

Review comment:
       Ack.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735555477



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();

Review comment:
       any reason why we prefer one method over the other? - just for my understanding




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735380063



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));

Review comment:
       Agreed




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-962006927


   @ashutosh-arm could you retrigger this one? the docker image is updated now.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734787732



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Given this is capped at the number of functions, do we know whether this meaningfully effects performance?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743820728



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Is there a problem with the first representation?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743859261



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       With https://github.com/apache/tvm/pull/9397 the `Target` will produce a single `IRModule` into `runtime::Module` rather than creating many so the functions are just wrappers around the CMSIS-NN calls after that which essentially create the CMSIS-NN structures and call them so that seems the minimal amount for them other than packing multiple CMSIS-NN API calls into a single PrimFunc which we didn't intend to do.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743830843



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Enabling this pass gets us the latter one. Irrespective of the Relay that we get post partitioning, RelayToTIR works on composite functions each one of which is translated into a separate primfunc. So, independent of the pass, atm, we end up producing multiple '.c'.
   
   We made the choice of having separate primfunc per operator to get away with the bookkeeping required in RelayToTIR at the cost of having to invoke different C functions at runtime.
   
   @Mousius you want to add something else?
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-971414337


   Thanks @ashutosh-arm @Mousius @areusch!


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734707993



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Why does it have to modify the input IRModule ?
   (because the transform pass returns a IRModule right ?)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734786945



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       I think the idea is just to not mutate the `IRModule`, right @manupa-arm? Creating a new `IRModule` would be more overhead I think?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733889168



##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734669302



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       Do we need to add unit tests for this given the reference system tests? This will eventually be replaced by TIR constants right? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734368762



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -47,42 +47,93 @@ def partition_for_cmsisnn(mod, params=None, **opts):
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
 
+    tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
     seq = tvm.transform.Sequential(
         [
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),
             transform.PartitionGraph(),
+            GenerateCMSISNNConstants(),
+            ExtractConstantsFromPartitionedFunction(),
+            transform.InferType(),
         ]
     )
-
     return seq(mod)
 
 
 @register_pattern_table("cmsisnn")
 def pattern_table():
     """Get the cmsisnn compiler pattern table."""
 
-    def softmax_pattern():
+    def qnn_softmax_pattern():
+        """Create pattern for quantized softmax"""
         pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
         pattern = is_op("nn.softmax")(pattern)
         pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
         return pattern
 
-    def check_quantized_softmax(extract):
+    def check_qnn_softmax(pattern):
         """Check if softmax is supported by CMSIS-NN."""
-        dequantize_call = extract.args[0].args[0]
-        scale = extract.args[1].data.numpy().item(0)
-        zero_point = extract.args[2].data.numpy().item(0)
+        dequantize_call = pattern.args[0].args[0]
+        scale = pattern.args[1].data.numpy().item(0)
+        zero_point = pattern.args[2].data.numpy().item(0)
 
         # check for dtypes of quantize and dequantize
         return (
             (scale == 1.0 / 256 and zero_point == -128)
-            and extract.attrs.out_dtype == "int8"
+            and pattern.attrs.out_dtype == "int8"
             and dequantize_call.args[0].checked_type.dtype == "int8"
         )
 
+    def qnn_conv2d_pattern():
+        """Create pattern for qnn.conv2D with optional fused relu."""
+        qnn_conv2d = is_op("qnn.conv2d")(
+            wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
+        ).has_attr({"kernel_layout": "HWIO"})
+        bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant())
+        req = is_op("qnn.requantize")(
+            qnn_conv2d | bias_add, is_constant(), is_constant(), is_constant(), is_constant()
+        )
+        clip_or_req = req.optional(is_op("clip"))
+        return clip_or_req
+
+    def check_qnn_conv2d(pattern):
+        """Check if the Conv2D is supported by CMSIS-NN."""
+        if str(pattern.op.name) == "clip":
+            relu = pattern
+            requantize = relu.args[0]
+        else:
+            requantize = pattern
+        requantize_input = requantize.args[0]
+        bias_add = None
+        bias_dtype = "int32"
+        if str(requantize_input.op.name) == "nn.bias_add":
+            bias_add = requantize_input
+            conv2d = bias_add.args[0]
+            bias_dtype = bias_add.args[1].checked_type.dtype
+        else:
+            conv2d = requantize_input
+        conv2d_input = conv2d.args[0]
+        conv2d_weight = conv2d.args[1]
+
+        # kernel zero_point should be 0
+        kernel_zp = conv2d.args[3].data.numpy()
+        kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp
+
+        return (
+            conv2d.attrs.kernel_layout == "HWIO"

Review comment:
       Why cant we offload any other kernel layout ? We are converting OHWI anyway, right ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {

Review comment:
       I think this condition is not useful.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {

Review comment:
       nit : I think you can just do Downcast<Call> to specialize the Expr be a Call Expr and access args.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */

Review comment:
       Can we add documentation as to what this Pass would do ?
   
   Also we would need unit testing for this pass.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {
+      Function func = GetRef<Function>(func_node);
+      auto new_func = VisitExpr(func);
+      if (!new_func.same_as(func)) {
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(func)) {
+          constants_within_function_.push_back(constant);
+          Var var_arg = Var(gen_var_name(), constant->tensor_type());
+          new_args.push_back(var_arg);
+        }
+        final_call = Call(new_func, new_args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  /* \brief Updated module where all calls have replaced constants with new variables */
+  IRModule mod_;
+  /* \brief Maintains mapping of original function to the replaced constants */
+  Map<Function, Array<Constant>> function_to_constants_;
+  /* \brief Constants being kicked out of a function during the function visit */
+  Array<Constant> constants_within_function_;
+  /* \brief Keeps track of variables being created */
+  int var_count_ = 0;
+  /* \brief Keeps track of function scope */
+  int func_nesting_level_ = 0;
+};
+
+/*!  * \brief Kicks out all constants out of the partitioned function into main()  */
+IRModule ExtractConstants(IRModule mod) {

Review comment:
       Coule we use const IRModule& here ? 

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();

Review comment:
       nit : We can avoid this with the above suggestion. Let me know what you think.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Would it be possible not to modify the input IRModule ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {

Review comment:
       Can we refactor the this section and the above ? I see a bit of code duplication and I think the difference is origin of the Function (being a local or a global)

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift

Review comment:
       Would you be able to give some comments as to what is being calculated here ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       Can we add documentation on what this pass would do ?
   
   Also we would need unit testing for this pass.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {

Review comment:
       I do not follow this logic. Why is this needed ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       This would be a bit compute intensive as this an Array and I think it take O(N) to do this.
   If we require this, I would suggest to use an onordered_set with constant access time.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));

Review comment:
       nit : I would Downcast the ObjectRef (expr) to Constant here through out because ultimately you would need the ObjectRef again.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {

Review comment:
       nit : it is not always required explicitly access the ConstantNode and you should be able to directly use Constant

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;

Review comment:
       A better design suggestion (feel free to disagree :) ) : We could use a stack of functions that gets pushed and popped when it goes in and out of scope.
   Thus, we dont need to do stateful clearing of constats_within_function_ array and performing the assignment here.
   
   Instead, as and when the visitor encounters a constant that could inserted to a Map<Function, Array<Constant>>.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();

Review comment:
       stylistic & nit : FYI, things could be accessed from ObjectRefs as well.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {
+      auto* func = call->op.as<FunctionNode>();
+      auto func_name = func->GetAttr<String>(attr::kComposite);
+      if (func_name.defined() && func_name == "cmsisnn.qnn_conv2d") {
+        Expr new_body = GenerateConv2dRequantConstants(func->body);
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        final_call = Call(new_func, post_call->args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  IRModule mod_;
+};
+
+IRModule GenerateConstants(IRModule mod) {

Review comment:
       I would be better if we dont have to modify the input IRModule to follow the design pattern -- unless there is a reason.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {

Review comment:
       This could be refactored with above section.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],

Review comment:
       nit : this has a bit odd spacing. is it clang-format doing this ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735467393



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}

Review comment:
       Yes, my concern was not to modify the input IRModule.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735379653



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736330658



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Agreed -- somehow this conversation started with Arrays :) and sorry I missed that Map uses unordered_map . Im fine as long as hash-based .find() impl is used.
   
   I would want @Leo-arm to confirm whether the assumption of using "CMSIS-NN it's bounded to a lower count of functions which will fit on an embedded device" as a driving principle in designing the codegen.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736334994



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();

Review comment:
       As I said its just stylistic, so feel free to ignore it :) .
   Personally, I thought the ObjectRef is sufficient and no need to access the Node.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743822140



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external functions ? and more translation units produced ? each ext function will become its own .c file right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743818992



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func1
   main_func <--> ext_func <--> cmsis_func2
   main_func <--> ext_func <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743818992



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734711358



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       Few examples would be, to check 
   
   * single operator in the external functions
   * multiple operators in the external functions
   * the multi nested functions (I see code to that effect here)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734707629



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>

Review comment:
       TVM codebase do have unit tests for all the passes and I think we need *some* unit tests (apart from end to end tests).
   
   It would be great if we can have some common expected relay-based test cases. It would also aid in understanding what this pass is doing.
   
   Why should not we have unit tests ?, if we are committing this code ( we still want to commit this code irrespective of it being replaced by TIR constants right ? )
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734802864



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       I'm not suggesting it's better, I'm just curious whether it'll have a real impact - `Array` is used heavily through-out the code base.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735463335



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Oops, it seems like we are discussing about a Map rather than an Array.
   Can't we use unordered_map then ?
   
   So my reasoning is using the .find(..) with an Array/Map -- Im not sure the Array/Map is commonly used like that especially when it is an internal data structure that is not exposed to python.
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/tir/transforms/loop_partition.cc#L331
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L854
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L1008
   
   So two questions here from code quality perspective,
   
    @ashutosh-arm , if the number of partitioned functions are bounded (prefferably to a constant) then it might not make an effect. Is it the case ? -- say if we think of a large network with many operators. If that is the case, it may be fine to overlook this.
   
   @Mousius , Hmmm, The tree is used to make the .find(..) faster. right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735525918



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       For micro networks, the number of CMSIS-NN ops is going to be small, so will the partitioned functions 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735566437



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {

Review comment:
       The two blocks are doing different things. After merging them, they start looking more confusing.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-965196030


   @manupa-arm / @ashutosh-arm in general if we're adding things that raise exceptions as part of their interface we should be testing them.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r746614175



##########
File path: tests/python/contrib/test_cmsisnn/test_extract_constants.py
##########
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: extract_constants pass"""
+import itertools
+import math
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+
+from utils import (
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
+
+class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
+    def __init__(self):
+        super().__init__()
+        self.num_constants_ = 0
+
+    def visit_call(self, call):
+        super().visit_call(call)
+        for arg in call.args:
+            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
+                self.num_constants_ += 1
+
+    def visit_function(self, func):
+        super().visit_function(func)
+        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
+
+
+def set_external_func_attr(func, compiler, ext_symbol):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compiler)
+    func = func.with_attr("global_symbol", ext_symbol)
+    return func
+
+
+def test_external_function():
+    y0_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x0 = relay.var("x0", shape=(8, 8))
+    y0_const = relay.const(y0_data, "float32")
+    z0 = x0 + y0_const
+    ef = relay.Function([x0], z0, relay.TensorType((8, 8), "float32"))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+
+    x = relay.var("x", shape=(8, 8))
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_nested_function():
+    y1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x1 = relay.var("x1", shape=(8, 8))
+    y1_const = relay.const(y1_data, "float32")
+    z1 = x1 + y1_const
+    w1 = z1 * relay.const(5.0, "float32")
+    lf = relay.Function([x1], w1, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    c0 = relay.Call(lf, [x0])
+    ef = relay.Function([x0], c0, relay.TensorType((8, 8), "float32"))
+
+    x = relay.var("x", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_multiple_functions():
+    y20_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x20 = relay.var("x20", shape=(8, 8))
+    y20_const = relay.const(y20_data, "float32")
+    z20 = x20 + y20_const
+    f20 = relay.Function([x20], z20, relay.TensorType((8, 8), "float32"))
+
+    y21_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x21 = relay.var("x21", shape=(8, 8))
+    y21_const = relay.const(y21_data, "float32")
+    z21 = x21 + y21_const
+    f21 = relay.Function([x21], z21, relay.TensorType((8, 8), "float32"))
+
+    x10 = relay.var("x10", shape=(8, 8))
+    c10 = relay.Call(f20, [x10])
+    c11 = relay.Call(f21, [c10])
+    ef = relay.Function([x10], c11, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x0])
+    mf = relay.Function([x0], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)

Review comment:
       some asserts are already part of the Visitor call above in the code: 
   `assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"`
   
   Is that what you were expecting as well?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r732945527



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW

Review comment:
       Isn't this already `TVMBAW` ? We just need to parse the flags to get the right sizes?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -47,42 +47,93 @@ def partition_for_cmsisnn(mod, params=None, **opts):
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
 
+    tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)

Review comment:
       I think we can put this at the top of the file as it imports into whichever scope it's in? It is likely to only ever be called once but it'd be good to guarantee it.

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));

Review comment:
       We should name these variables better rather than adding comments, plus I don't think we need auto types here?
   ```suggestion
        tir::Var in_var("input", DataType::Handle(8));
        tir::Var filter("filter", DataType::Handle(8));    
        tir::Var multiplier("multiplier", DataType::Handle(32));  
        tir::Var filter_scale("filter_scale", DataType::Handle(32)); 
        tir::Var bias("bias", DataType::Handle(32));       
        tir::Var input_scale("input_scale", DataType::Handle(32));  
        tir::Var shift("shift", DataType::Handle(32));        
        tir::Var output("output", DataType::Handle(8));
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();

Review comment:
       ```suggestion
       Conv2DAttrs* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
   ```

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")

Review comment:
       Genuinely unsure what `bc` means in this context.

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)

Review comment:
       We should use more descriptive names?
   
   ```suggestion
       input = relay.var("input", shape=shape, dtype=dtype)
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }

Review comment:
       Should we make this more CMSIS-NN specific?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();

Review comment:
       ```suggestion
       CallNode* final_call = expr.as<CallNode>();
       OpNode* final_op = final_call->op.as<OpNode>();
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions

Review comment:
       We should consider a less aggressive verb for extracting constants.

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);

Review comment:
       You should be able to include this from `../../../op/make_op.h` 

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;

Review comment:
       ```suggestion
       Array<PrimExpr> filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();

Review comment:
       ```suggestion
         ClipAttrs* clip_attrs = clip_call->attrs.as<ClipAttrs>();
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims

Review comment:
       It might be worth constructing helper functions to generate these pseudo-struct's just to make it easier to see?
   
   ```
   Array<IntImm> dimensions = CMSISNNDimensions(input_shape);
   ```
   
   and then at the end doing:
   
   ```cpp
   Concat(extern_args, dimensions, woofles);
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();

Review comment:
       ```suggestion
       CallNode* requantize_input = requantize_call->args[0].as<CallNode>();
       OpNode* requantize_input_op = requantize_input->op.as<OpNode>();
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
+    int32_t filter_n = qnn::get_const_int(filter_shape[0]);
+    int32_t filter_h = qnn::get_const_int(filter_shape[1]);
+    int32_t filter_w = qnn::get_const_int(filter_shape[2]);
+    int32_t filter_c = qnn::get_const_int(filter_shape[3]);
+
+    // cmsis_nn_dims *bias_dims
+    int32_t bias_n = 1;
+    int32_t bias_h = 1;
+    int32_t bias_w = 1;
+    int32_t bias_c = qnn::get_const_int(filter_shape[0]);
+
+    // cmsis_nn_dims *output_dims
+    auto output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;

Review comment:
       ```suggestion
       Array<PrimExpr> output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;

Review comment:
       We should try not to use `auto` unless it gets very long - https://google.github.io/styleguide/cppguide.html#Type_deduction, it took me a minute to figure out what type this was going to be.
   
   ```suggestion
       Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
+    int32_t filter_n = qnn::get_const_int(filter_shape[0]);
+    int32_t filter_h = qnn::get_const_int(filter_shape[1]);
+    int32_t filter_w = qnn::get_const_int(filter_shape[2]);
+    int32_t filter_c = qnn::get_const_int(filter_shape[3]);
+
+    // cmsis_nn_dims *bias_dims
+    int32_t bias_n = 1;
+    int32_t bias_h = 1;
+    int32_t bias_w = 1;
+    int32_t bias_c = qnn::get_const_int(filter_shape[0]);
+
+    // cmsis_nn_dims *output_dims
+    auto output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;
+    int32_t output_n = qnn::get_const_int(output_shape[0]);
+    int32_t output_h = qnn::get_const_int(output_shape[1]);
+    int32_t output_w = qnn::get_const_int(output_shape[2]);
+    int32_t output_c = qnn::get_const_int(output_shape[3]);
+
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm("arm_convolve_wrapper_s8"), in_var,
+                                          const_var0, const_var1};
+    if (bias_add_call) {
+      call_ext_args.push_back(const_var3);
+    }
+    call_ext_args.push_back(const_var5);
+    call_ext_args.push_back(out_var);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(output_offset), ToArg(stride_w),   ToArg(stride_h),
+        ToArg(padding_w),    ToArg(padding_h),     ToArg(dilation_w), ToArg(dilation_h),
+        ToArg(clip_min),     ToArg(clip_max),      ToArg(input_n),    ToArg(input_h),
+        ToArg(input_w),      ToArg(input_c),       ToArg(filter_n),   ToArg(filter_h),
+        ToArg(filter_w),     ToArg(filter_c),      ToArg(bias_n),     ToArg(bias_h),
+        ToArg(bias_w),       ToArg(bias_c),        ToArg(output_n),   ToArg(output_h),
+        ToArg(output_w),     ToArg(output_c),
+    };
+
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);
+
+    Array<tir::Var> func_signature{in_var, const_var0, const_var1, const_var2};
+    if (bias_add_call) {
+      func_signature.push_back(const_var3);
+    }
+    func_signature.push_back(const_var4);
+    func_signature.push_back(const_var5);
+    func_signature.push_back(out_var);
+
+    // https://github.com/ARM-software/CMSIS_5/blob/d788fd583984388553391de18afd8b4d2a146868/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c#L367
+    size_t context_buffer_size = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
+
+    CreatePrimFuncForExtern(func_signature, call_ext_args, context_buffer_size);
+  }
+
   void EmitSoftMax(const Expr& expr) {
     auto* quantize_call = expr.as<CallNode>();
     auto* softmax_call = quantize_call->args[0].as<CallNode>();
     auto* dequant_call = softmax_call->args[0].as<CallNode>();
-    const float quant_scale = GetScalarFromConstant<float>(dequant_call->args[1]);
+    auto* scale_const = dequant_call->args[1].as<ConstantNode>();

Review comment:
       ```suggestion
       ConstantNode* scale_const = dequant_call->args[1].as<ConstantNode>();
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);
+    int output_offset = get_arg_value(op, ++arg_id);
+    int stride_w = get_arg_value(op, ++arg_id);
+    int stride_h = get_arg_value(op, ++arg_id);
+    int padding_w = get_arg_value(op, ++arg_id);
+    int padding_h = get_arg_value(op, ++arg_id);
+    int dilation_w = get_arg_value(op, ++arg_id);
+    int dilation_h = get_arg_value(op, ++arg_id);
+    int clip_min = get_arg_value(op, ++arg_id);
+    int clip_max = get_arg_value(op, ++arg_id);
+    int input_n = get_arg_value(op, ++arg_id);
+    int input_h = get_arg_value(op, ++arg_id);
+    int input_w = get_arg_value(op, ++arg_id);
+    int input_c = get_arg_value(op, ++arg_id);
+    int filter_n = get_arg_value(op, ++arg_id);
+    int filter_h = get_arg_value(op, ++arg_id);
+    int filter_w = get_arg_value(op, ++arg_id);
+    int filter_c = get_arg_value(op, ++arg_id);
+    int bias_n = get_arg_value(op, ++arg_id);
+    int bias_h = get_arg_value(op, ++arg_id);
+    int bias_w = get_arg_value(op, ++arg_id);
+    int bias_c = get_arg_value(op, ++arg_id);
+    int output_n = get_arg_value(op, ++arg_id);
+    int output_h = get_arg_value(op, ++arg_id);
+    int output_w = get_arg_value(op, ++arg_id);
+    int output_c = get_arg_value(op, ++arg_id);
+
+    // TODO(ashutosh-arm) for mve code, need to look for tir allocate

Review comment:
       Done now?

##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -223,19 +353,25 @@ class RelayToTIRVisitor : public MixedModeVisitor {
 
     auto comp_name = func->GetAttr<String>(attr::kComposite);
     if (comp_name.defined()) {
-      if (comp_name == "cmsisnn.quantized_softmax") {
+      if (comp_name == "cmsisnn.qnn_conv2d") {
+        EmitConv2D(func->body);
+      }
+      if (comp_name == "cmsisnn.qnn_softmax") {
         EmitSoftMax(func->body);
       }
-      if (comp_name == "cmsisnn.quantized_mul") {
+      if (comp_name == "cmsisnn.qnn_mul") {
         EmitMul(func->body);
       }
-      if (comp_name == "cmsisnn.quantized_add") {
+      if (comp_name == "cmsisnn.qnn_add") {
         EmitAdd(func->body);
       }
     }
   }
 
  public:
+  int32_t kScaledDiffIntegerBits = 5;

Review comment:
       Do all of these need to be public now? Also I believe we could mark these as `constexpr` ?

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);
+    int output_offset = get_arg_value(op, ++arg_id);
+    int stride_w = get_arg_value(op, ++arg_id);
+    int stride_h = get_arg_value(op, ++arg_id);
+    int padding_w = get_arg_value(op, ++arg_id);
+    int padding_h = get_arg_value(op, ++arg_id);
+    int dilation_w = get_arg_value(op, ++arg_id);
+    int dilation_h = get_arg_value(op, ++arg_id);
+    int clip_min = get_arg_value(op, ++arg_id);
+    int clip_max = get_arg_value(op, ++arg_id);
+    int input_n = get_arg_value(op, ++arg_id);
+    int input_h = get_arg_value(op, ++arg_id);
+    int input_w = get_arg_value(op, ++arg_id);
+    int input_c = get_arg_value(op, ++arg_id);
+    int filter_n = get_arg_value(op, ++arg_id);
+    int filter_h = get_arg_value(op, ++arg_id);
+    int filter_w = get_arg_value(op, ++arg_id);
+    int filter_c = get_arg_value(op, ++arg_id);
+    int bias_n = get_arg_value(op, ++arg_id);
+    int bias_h = get_arg_value(op, ++arg_id);
+    int bias_w = get_arg_value(op, ++arg_id);
+    int bias_c = get_arg_value(op, ++arg_id);
+    int output_n = get_arg_value(op, ++arg_id);
+    int output_h = get_arg_value(op, ++arg_id);
+    int output_w = get_arg_value(op, ++arg_id);
+    int output_c = get_arg_value(op, ++arg_id);
+
+    // TODO(ashutosh-arm) for mve code, need to look for tir allocate
+    std::string context = EmitCMSISNNContext(stream, context_buffer_name_, context_buffer_size_);
+    std::string conv_params =
+        EmitCMSISNNConvParams(stream, input_offset, output_offset, stride_w, stride_h, padding_w,
+                              padding_h, dilation_w, dilation_h, clip_min, clip_max);
+    std::string quant_params = EmitCMSISNNPerChannelQuantParams(stream, multiplier, shift);
+    std::string input_dim = EmitCMSISNNDims(stream, "input", input_n, input_h, input_w, input_c);
+    std::string filter_dim =
+        EmitCMSISNNDims(stream, "filter", filter_n, filter_h, filter_w, filter_c);
+    std::string bias_dim = EmitCMSISNNDims(stream, "bias", bias_n, bias_h, bias_w, bias_c);
+    std::string output_dim =
+        EmitCMSISNNDims(stream, "output", output_n, output_h, output_w, output_c);
+
+    PrintIndent();
+    stream << "arm_status status = ";
+    stream << cmsis_func_name << "(";
+    stream << "&" << context << ", ";
+    stream << "&" << conv_params << ", ";
+    stream << "&" << quant_params << ", ";
+    stream << "&" << input_dim << ", " << input_data << ", ";
+    stream << "&" << filter_dim << ", " << filter_data << ", ";
+    stream << "&" << bias_dim << ", " << bias_data << ", ";
+    stream << "&" << output_dim << ", " << output_data << ");\n";
+    PrintIndent();
+    stream << "if (status != ARM_MATH_SUCCESS) {\n";
+    PrintIndent();
+    PrintIndent();
+    stream << "printf(\"Failed during execution of " << cmsis_func_name << "().\");\n";

Review comment:
       We can't assume there'll be `printf` available, this function should return `0` or `-1` on error.

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -69,6 +225,10 @@ class CodeGenCMSISNN : public CodeGenC {
     ss << "}\n";
     ss << "#endif\n";
   }
+
+ private:

Review comment:
       Isn't this block still private from above?

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)

Review comment:
       This logic is a bit weird, it'd be clearer if it didn't have `bias = conv` in there, maybe something like:
   ```suggestion
       last_op = relay.nn.bias_add(conv, bc, axis=3) if enable_bias else conv
   ```

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0
+    groups = 1
+    weight_format = "HWIO"
+    kernel_h = kernel_size[0]
+    kernel_w = kernel_size[1]
+    dtype = "int8"
+    in_min, in_max = get_range_for_dtype_str(dtype)
+
+    weight_shape = None
+    if weight_format == "HWIO":
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
+    else:
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels)
+
+    out_sc, out_zp = get_conv2d_qnn_params(
+        weight_shape, in_sc, in_zp, k_sc, k_zp, dtype, dtype, dtype, False
+    )
+
+    model, params = make_model(
+        ifm_shape,
+        weight_shape,
+        in_zp,
+        in_sc,
+        k_zp,
+        k_sc,
+        out_zp,
+        out_sc,
+        padding,
+        strides,
+        dilation,
+        groups,
+        dtype,
+        dtype,
+        out_channels,
+        weight_format,
+        enable_bias,
+        relu_type,
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # validate pattern matching
+    attrs = [
+        cmsisnn_mod[var.name_hint].attrs
+        for var in cmsisnn_mod.get_global_vars()
+        if cmsisnn_mod[var.name_hint].attrs
+    ]
+    assert any(attrs), "At least one function with external attributes was expected."
+
+    compilers = [
+        key == "Compiler" and value == "cmsisnn" for attr in attrs for key, value in attr.items()
+    ]
+    assert any(compilers), "Module does not contain function for cmsisnn target."
+
+    assert count_num_calls(orig_mod) == count_num_calls(
+        cmsisnn_mod
+    ), "Number of calls changed during partitioning"
+
+    # validate the output
+    np.random.seed(0)
+    inputs = {
+        "in0": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8"),
+    }
+    output_list = generate_ref_data(orig_mod["main"], inputs, params)
+    compile_and_run(
+        AOTTestModel(
+            module=cmsisnn_mod,
+            inputs=inputs,
+            outputs=output_list,
+            params=params,
+            output_tolerance=1,
+        ),
+        test_runner,
+        interface_api,
+        use_unpacked_api,
+    )
+
+
+def parameterize_for_invalid_model(test):
+    in_dtype = ["uint8", "int8"]
+    kernel_dtype = ["uint8", "int8"]
+    kernel_zero_point = [-33, 10, 0]
+    all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point)
+    all_combinations = filter(
+        lambda parameters: not (
+            parameters[0] == "int8" and parameters[1] == "int8" and parameters[2] == 0
+        ),
+        all_combinations,
+    )
+    return pytest.mark.parametrize(
+        ["in_dtype", "kernel_dtype", "kernel_zero_point"],
+        all_combinations,
+    )(test)
+
+
+@parameterize_for_invalid_model
+def test_invalid_parameters(
+    in_dtype,
+    kernel_dtype,
+    kernel_zero_point,
+):
+    ifm_shape = (1, 28, 28, 12)
+    out_channels = 2
+    in_sc = 1
+    in_zp = 24
+    k_sc = [0.11, 0.0237]
+    in_min, in_max = get_range_for_dtype_str(in_dtype)
+
+    kernel_layout = "HWIO"
+    kernel_shape = [3, 3, ifm_shape[3], out_channels]
+    out_sc, out_zp = get_conv2d_qnn_params(
+        kernel_shape, in_sc, in_zp, k_sc, kernel_zero_point, in_dtype, kernel_dtype, in_dtype, False
+    )
+    model, params = make_model(
+        shape=ifm_shape,
+        kernel_shape=kernel_shape,
+        input_zp=in_zp,
+        input_sc=in_sc,
+        kernel_zp=kernel_zero_point,
+        kernel_sc=k_sc,
+        output_zp=out_zp,
+        output_sc=out_sc,
+        padding="SAME",
+        strides=(1, 1),
+        dilation=(1, 1),
+        groups=1,
+        dtype=in_dtype,
+        kernel_dtype=kernel_dtype,
+        out_channels=out_channels,
+        weight_format=kernel_layout,
+        enable_bias=True,
+        relu_type="NONE",
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # print(cmsisnn_mod.astext(False))

Review comment:
       `print` snuck in?

##########
File path: tests/python/contrib/test_cmsisnn/test_networks.py
##########
@@ -92,7 +92,6 @@ def test_cnn_small():
 
     orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
     cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-

Review comment:
       Any particular reason to remove this line in particular?

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0

Review comment:
       ```suggestion
       zero_point = 0
   ```

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0
+    groups = 1
+    weight_format = "HWIO"
+    kernel_h = kernel_size[0]
+    kernel_w = kernel_size[1]
+    dtype = "int8"
+    in_min, in_max = get_range_for_dtype_str(dtype)
+
+    weight_shape = None
+    if weight_format == "HWIO":
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
+    else:
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels)
+
+    out_sc, out_zp = get_conv2d_qnn_params(
+        weight_shape, in_sc, in_zp, k_sc, k_zp, dtype, dtype, dtype, False
+    )
+
+    model, params = make_model(
+        ifm_shape,
+        weight_shape,
+        in_zp,
+        in_sc,
+        k_zp,
+        k_sc,
+        out_zp,
+        out_sc,
+        padding,
+        strides,
+        dilation,
+        groups,
+        dtype,
+        dtype,
+        out_channels,
+        weight_format,
+        enable_bias,
+        relu_type,
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # validate pattern matching
+    attrs = [
+        cmsisnn_mod[var.name_hint].attrs
+        for var in cmsisnn_mod.get_global_vars()
+        if cmsisnn_mod[var.name_hint].attrs
+    ]
+    assert any(attrs), "At least one function with external attributes was expected."
+
+    compilers = [
+        key == "Compiler" and value == "cmsisnn" for attr in attrs for key, value in attr.items()
+    ]
+    assert any(compilers), "Module does not contain function for cmsisnn target."
+
+    assert count_num_calls(orig_mod) == count_num_calls(
+        cmsisnn_mod
+    ), "Number of calls changed during partitioning"
+
+    # validate the output
+    np.random.seed(0)

Review comment:
       This pollutes the global `numpy` namespace, if we want to fix the seed we should use [the numpy Random Generator](https://numpy.org/doc/stable/reference/random/generator.html) to isolate it.
   
   Also, maybe `42` rather than `0`? :smile_cat:




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734740995



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {

Review comment:
       i will remove it.
   at some pt during the impl, i was trying to return func instead of a call to remove the need for having an extra call to the local fn. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735556089



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift

Review comment:
       Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736338880



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();

Review comment:
       In this case it works well, but Downcast does not work when we don't check for appropriate type upfront. But this was useful info. I will keep this in mind for upcoming adventures :)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734793206



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       How is it better than not using a constant time acces data structure?
   
   What is the cap of # of functions?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       How is it better than not using a constant time access data structure?
   
   What is the cap of # of functions?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733864363



##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0
+    groups = 1
+    weight_format = "HWIO"
+    kernel_h = kernel_size[0]
+    kernel_w = kernel_size[1]
+    dtype = "int8"
+    in_min, in_max = get_range_for_dtype_str(dtype)
+
+    weight_shape = None
+    if weight_format == "HWIO":
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
+    else:
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels)
+
+    out_sc, out_zp = get_conv2d_qnn_params(
+        weight_shape, in_sc, in_zp, k_sc, k_zp, dtype, dtype, dtype, False
+    )
+
+    model, params = make_model(
+        ifm_shape,
+        weight_shape,
+        in_zp,
+        in_sc,
+        k_zp,
+        k_sc,
+        out_zp,
+        out_sc,
+        padding,
+        strides,
+        dilation,
+        groups,
+        dtype,
+        dtype,
+        out_channels,
+        weight_format,
+        enable_bias,
+        relu_type,
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # validate pattern matching
+    attrs = [
+        cmsisnn_mod[var.name_hint].attrs
+        for var in cmsisnn_mod.get_global_vars()
+        if cmsisnn_mod[var.name_hint].attrs
+    ]
+    assert any(attrs), "At least one function with external attributes was expected."
+
+    compilers = [
+        key == "Compiler" and value == "cmsisnn" for attr in attrs for key, value in attr.items()
+    ]
+    assert any(compilers), "Module does not contain function for cmsisnn target."
+
+    assert count_num_calls(orig_mod) == count_num_calls(
+        cmsisnn_mod
+    ), "Number of calls changed during partitioning"
+
+    # validate the output
+    np.random.seed(0)

Review comment:
       Thanks! This was new to me.

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0
+    groups = 1
+    weight_format = "HWIO"
+    kernel_h = kernel_size[0]
+    kernel_w = kernel_size[1]
+    dtype = "int8"
+    in_min, in_max = get_range_for_dtype_str(dtype)
+
+    weight_shape = None
+    if weight_format == "HWIO":
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
+    else:
+        weight_shape = (kernel_h, kernel_w, ifm_shape[3], out_channels)
+
+    out_sc, out_zp = get_conv2d_qnn_params(
+        weight_shape, in_sc, in_zp, k_sc, k_zp, dtype, dtype, dtype, False
+    )
+
+    model, params = make_model(
+        ifm_shape,
+        weight_shape,
+        in_zp,
+        in_sc,
+        k_zp,
+        k_sc,
+        out_zp,
+        out_sc,
+        padding,
+        strides,
+        dilation,
+        groups,
+        dtype,
+        dtype,
+        out_channels,
+        weight_format,
+        enable_bias,
+        relu_type,
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # validate pattern matching
+    attrs = [
+        cmsisnn_mod[var.name_hint].attrs
+        for var in cmsisnn_mod.get_global_vars()
+        if cmsisnn_mod[var.name_hint].attrs
+    ]
+    assert any(attrs), "At least one function with external attributes was expected."
+
+    compilers = [
+        key == "Compiler" and value == "cmsisnn" for attr in attrs for key, value in attr.items()
+    ]
+    assert any(compilers), "Module does not contain function for cmsisnn target."
+
+    assert count_num_calls(orig_mod) == count_num_calls(
+        cmsisnn_mod
+    ), "Number of calls changed during partitioning"
+
+    # validate the output
+    np.random.seed(0)
+    inputs = {
+        "in0": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8"),
+    }
+    output_list = generate_ref_data(orig_mod["main"], inputs, params)
+    compile_and_run(
+        AOTTestModel(
+            module=cmsisnn_mod,
+            inputs=inputs,
+            outputs=output_list,
+            params=params,
+            output_tolerance=1,
+        ),
+        test_runner,
+        interface_api,
+        use_unpacked_api,
+    )
+
+
+def parameterize_for_invalid_model(test):
+    in_dtype = ["uint8", "int8"]
+    kernel_dtype = ["uint8", "int8"]
+    kernel_zero_point = [-33, 10, 0]
+    all_combinations = itertools.product(in_dtype, kernel_dtype, kernel_zero_point)
+    all_combinations = filter(
+        lambda parameters: not (
+            parameters[0] == "int8" and parameters[1] == "int8" and parameters[2] == 0
+        ),
+        all_combinations,
+    )
+    return pytest.mark.parametrize(
+        ["in_dtype", "kernel_dtype", "kernel_zero_point"],
+        all_combinations,
+    )(test)
+
+
+@parameterize_for_invalid_model
+def test_invalid_parameters(
+    in_dtype,
+    kernel_dtype,
+    kernel_zero_point,
+):
+    ifm_shape = (1, 28, 28, 12)
+    out_channels = 2
+    in_sc = 1
+    in_zp = 24
+    k_sc = [0.11, 0.0237]
+    in_min, in_max = get_range_for_dtype_str(in_dtype)
+
+    kernel_layout = "HWIO"
+    kernel_shape = [3, 3, ifm_shape[3], out_channels]
+    out_sc, out_zp = get_conv2d_qnn_params(
+        kernel_shape, in_sc, in_zp, k_sc, kernel_zero_point, in_dtype, kernel_dtype, in_dtype, False
+    )
+    model, params = make_model(
+        shape=ifm_shape,
+        kernel_shape=kernel_shape,
+        input_zp=in_zp,
+        input_sc=in_sc,
+        kernel_zp=kernel_zero_point,
+        kernel_sc=k_sc,
+        output_zp=out_zp,
+        output_sc=out_sc,
+        padding="SAME",
+        strides=(1, 1),
+        dilation=(1, 1),
+        groups=1,
+        dtype=in_dtype,
+        kernel_dtype=kernel_dtype,
+        out_channels=out_channels,
+        weight_format=kernel_layout,
+        enable_bias=True,
+        relu_type="NONE",
+    )
+    orig_mod = make_module(model)
+    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
+
+    # print(cmsisnn_mod.astext(False))

Review comment:
       Removed it.

##########
File path: tests/python/contrib/test_cmsisnn/test_networks.py
##########
@@ -92,7 +92,6 @@ def test_cnn_small():
 
     orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
     cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
-

Review comment:
       No. Added back in.

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {
+      Function func = GetRef<Function>(func_node);
+      auto new_func = VisitExpr(func);
+      if (!new_func.same_as(func)) {
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(func)) {
+          constants_within_function_.push_back(constant);
+          Var var_arg = Var(gen_var_name(), constant->tensor_type());
+          new_args.push_back(var_arg);
+        }
+        final_call = Call(new_func, new_args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  /* \brief Updated module where all calls have replaced constants with new variables */
+  IRModule mod_;
+  /* \brief Maintains mapping of original function to the replaced constants */
+  Map<Function, Array<Constant>> function_to_constants_;
+  /* \brief Constants being kicked out of a function during the function visit */
+  Array<Constant> constants_within_function_;
+  /* \brief Keeps track of variables being created */
+  int var_count_ = 0;
+  /* \brief Keeps track of function scope */
+  int func_nesting_level_ = 0;
+};
+
+/*!  * \brief Kicks out all constants out of the partitioned function into main()  */
+IRModule ExtractConstants(IRModule mod) {
+  String func_name;
+  Function func;
+
+  auto extract_constants = ExtractConstantsMutator(mod);
+  Function main_func = Downcast<Function>(mod->Lookup("main"));
+  auto new_main_body = extract_constants.VisitExpr(main_func->body);
+  if (!new_main_body.same_as(main_func->body)) {
+    auto main_var = mod->GetGlobalVar("main");
+    auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type,
+                                  main_func->type_params, main_func->attrs);
+    mod->Update(main_var, new_main_func);
+  }
+  return mod;
+}
+
+transform::Pass ExtractConstantsFromPartitionedFunction() {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule m, transform::PassContext pc) { return ExtractConstants(m); };
+  return tvm::transform::CreateModulePass(pass_func, 0, "ExtractConstantsFromPartitionedFunction",
+                                          {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ExtractConstantsFromPartitionedFunction")
+    .set_body_typed([]() { return ExtractConstantsFromPartitionedFunction(); });

Review comment:
       ACK

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {
+      auto* func = call->op.as<FunctionNode>();
+      auto func_name = func->GetAttr<String>(attr::kComposite);
+      if (func_name.defined() && func_name == "cmsisnn.qnn_conv2d") {
+        Expr new_body = GenerateConv2dRequantConstants(func->body);
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        final_call = Call(new_func, post_call->args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  IRModule mod_;
+};
+
+IRModule GenerateConstants(IRModule mod) {
+  String func_name;
+  Function func;
+
+  // Introduces CMSIS-NN constants before the call to the external Relay function
+  auto generate_constants = GenerateConstantsMutator(mod);
+  Function main_func = Downcast<Function>(mod->Lookup("main"));
+  auto new_main_body = generate_constants.VisitExpr(main_func->body);
+  if (!new_main_body.same_as(main_func->body)) {
+    auto main_var = mod->GetGlobalVar("main");
+    auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type,
+                                  main_func->type_params, main_func->attrs);
+    mod->Update(main_var, new_main_func);
+  }
+
+  return mod;
+}
+
+transform::Pass GenerateCMSISNNConstants() {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule m, transform::PassContext pc) { return GenerateConstants(m); };
+  return tvm::transform::CreateModulePass(pass_func, 0, "GenerateCMSISNNConstants", {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GenerateCMSISNNConstants").set_body_typed([]() {
+  return GenerateCMSISNNConstants();
+});

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r732978955



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {
+      Function func = GetRef<Function>(func_node);
+      auto new_func = VisitExpr(func);
+      if (!new_func.same_as(func)) {
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(func)) {
+          constants_within_function_.push_back(constant);
+          Var var_arg = Var(gen_var_name(), constant->tensor_type());
+          new_args.push_back(var_arg);
+        }
+        final_call = Call(new_func, new_args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  /* \brief Updated module where all calls have replaced constants with new variables */
+  IRModule mod_;
+  /* \brief Maintains mapping of original function to the replaced constants */
+  Map<Function, Array<Constant>> function_to_constants_;
+  /* \brief Constants being kicked out of a function during the function visit */
+  Array<Constant> constants_within_function_;
+  /* \brief Keeps track of variables being created */
+  int var_count_ = 0;
+  /* \brief Keeps track of function scope */
+  int func_nesting_level_ = 0;
+};
+
+/*!  * \brief Kicks out all constants out of the partitioned function into main()  */
+IRModule ExtractConstants(IRModule mod) {
+  String func_name;
+  Function func;
+
+  auto extract_constants = ExtractConstantsMutator(mod);
+  Function main_func = Downcast<Function>(mod->Lookup("main"));
+  auto new_main_body = extract_constants.VisitExpr(main_func->body);
+  if (!new_main_body.same_as(main_func->body)) {
+    auto main_var = mod->GetGlobalVar("main");
+    auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type,
+                                  main_func->type_params, main_func->attrs);
+    mod->Update(main_var, new_main_func);
+  }
+  return mod;
+}
+
+transform::Pass ExtractConstantsFromPartitionedFunction() {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule m, transform::PassContext pc) { return ExtractConstants(m); };
+  return tvm::transform::CreateModulePass(pass_func, 0, "ExtractConstantsFromPartitionedFunction",
+                                          {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ExtractConstantsFromPartitionedFunction")
+    .set_body_typed([]() { return ExtractConstantsFromPartitionedFunction(); });

Review comment:
       ```suggestion
   TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.ExtractConstantsFromPartitionedFunction")
       .set_body_typed(ExtractConstantsFromPartitionedFunction);
   ```

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {
+      auto* func = call->op.as<FunctionNode>();
+      auto func_name = func->GetAttr<String>(attr::kComposite);
+      if (func_name.defined() && func_name == "cmsisnn.qnn_conv2d") {
+        Expr new_body = GenerateConv2dRequantConstants(func->body);
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        final_call = Call(new_func, post_call->args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  IRModule mod_;
+};
+
+IRModule GenerateConstants(IRModule mod) {
+  String func_name;
+  Function func;
+
+  // Introduces CMSIS-NN constants before the call to the external Relay function
+  auto generate_constants = GenerateConstantsMutator(mod);
+  Function main_func = Downcast<Function>(mod->Lookup("main"));
+  auto new_main_body = generate_constants.VisitExpr(main_func->body);
+  if (!new_main_body.same_as(main_func->body)) {
+    auto main_var = mod->GetGlobalVar("main");
+    auto new_main_func = Function(main_func->params, new_main_body, main_func->ret_type,
+                                  main_func->type_params, main_func->attrs);
+    mod->Update(main_var, new_main_func);
+  }
+
+  return mod;
+}
+
+transform::Pass GenerateCMSISNNConstants() {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> pass_func =
+      [=](IRModule m, transform::PassContext pc) { return GenerateConstants(m); };
+  return tvm::transform::CreateModulePass(pass_func, 0, "GenerateCMSISNNConstants", {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GenerateCMSISNNConstants").set_body_typed([]() {
+  return GenerateCMSISNNConstants();
+});

Review comment:
       ```suggestion
   TVM_REGISTER_GLOBAL("relay.ext.cmsisnn.transform.GenerateCMSISNNConstants").set_body_typed(GenerateCMSISNNConstants);
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735715740



##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */

Review comment:
       Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734678777



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {
+      Function func = GetRef<Function>(func_node);
+      auto new_func = VisitExpr(func);
+      if (!new_func.same_as(func)) {
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(func)) {
+          constants_within_function_.push_back(constant);
+          Var var_arg = Var(gen_var_name(), constant->tensor_type());
+          new_args.push_back(var_arg);
+        }
+        final_call = Call(new_func, new_args);
+      }
+    }
+
+    return final_call;
+  }
+
+ private:
+  /* \brief Updated module where all calls have replaced constants with new variables */
+  IRModule mod_;
+  /* \brief Maintains mapping of original function to the replaced constants */
+  Map<Function, Array<Constant>> function_to_constants_;
+  /* \brief Constants being kicked out of a function during the function visit */
+  Array<Constant> constants_within_function_;
+  /* \brief Keeps track of variables being created */
+  int var_count_ = 0;
+  /* \brief Keeps track of function scope */
+  int func_nesting_level_ = 0;
+};
+
+/*!  * \brief Kicks out all constants out of the partitioned function into main()  */
+IRModule ExtractConstants(IRModule mod) {

Review comment:
       Look at @Mousius comment below. Looks like the answer is "no" unless you've some suggestion for an alternative implementation.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743811104



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Although, enabling this has no impact on the generated primfunc, but wanted to keep it simple to have 1-1 mapping of single external Relay function to single primfunc.

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Is there a problem with the first representation?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Enabling this pass gets us the latter one. Irrespective of the Relay that we get post partitioning, RelayToTIR works on composite functions each one of which is translated into a separate primfunc. So, independent of the pass, atm, we end up producing multiple '.c'.
   
   We made the choice of having separate primfunc per operator to get away with the bookkeeping required in RelayToTIR at the cost of having to invoke different C functions at runtime.
   
   @Mousius you want to add something else?
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-962006927


   @ashutosh-arm could you retrigger this one? the docker image is updated now.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743859261



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       With https://github.com/apache/tvm/pull/9397 the `Target` will produce a single `IRModule` into `runtime::Module` rather than creating many so the functions are just wrappers around the CMSIS-NN calls after that which essentially create the CMSIS-NN structures and call them so that seems the minimal amount for them other than packing multiple CMSIS-NN API calls into a single PrimFunc which we didn't intend to do.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966162931


   > Generally these assertions are derived from assumptions that a user flow will never trigger them and usually are about inputs to a particularly component.
   
   I would question why we're adding them if they're in unreachable code paths, but if we do think they're worth adding I would consider them part of the contract of the internal function and thus should have associated tests for the behaviour (throwing an exception is now part of the functions behaviour).
   
   As per [TVM Code Review Guidelines](https://github.com/apache/tvm/blob/main/docs/contribute/code_review.rst#factors-to-consider-about-code-quality), we should add test coverage for anything we contribute. If we're happy that at some point someone may inadvertently change the behaviour of this function, then potentially it's not a necessary line of code and we can apply [the YAGNI principle from extreme programming](https://en.wikipedia.org/wiki/You_aren%27t_gonna_need_it) :smile_cat: 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733882837



##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);
+    int output_offset = get_arg_value(op, ++arg_id);
+    int stride_w = get_arg_value(op, ++arg_id);
+    int stride_h = get_arg_value(op, ++arg_id);
+    int padding_w = get_arg_value(op, ++arg_id);
+    int padding_h = get_arg_value(op, ++arg_id);
+    int dilation_w = get_arg_value(op, ++arg_id);
+    int dilation_h = get_arg_value(op, ++arg_id);
+    int clip_min = get_arg_value(op, ++arg_id);
+    int clip_max = get_arg_value(op, ++arg_id);
+    int input_n = get_arg_value(op, ++arg_id);
+    int input_h = get_arg_value(op, ++arg_id);
+    int input_w = get_arg_value(op, ++arg_id);
+    int input_c = get_arg_value(op, ++arg_id);
+    int filter_n = get_arg_value(op, ++arg_id);
+    int filter_h = get_arg_value(op, ++arg_id);
+    int filter_w = get_arg_value(op, ++arg_id);
+    int filter_c = get_arg_value(op, ++arg_id);
+    int bias_n = get_arg_value(op, ++arg_id);
+    int bias_h = get_arg_value(op, ++arg_id);
+    int bias_w = get_arg_value(op, ++arg_id);
+    int bias_c = get_arg_value(op, ++arg_id);
+    int output_n = get_arg_value(op, ++arg_id);
+    int output_h = get_arg_value(op, ++arg_id);
+    int output_w = get_arg_value(op, ++arg_id);
+    int output_c = get_arg_value(op, ++arg_id);
+
+    // TODO(ashutosh-arm) for mve code, need to look for tir allocate

Review comment:
       yes, i have removed it now.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735526607



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {

Review comment:
       After some thought, except for the VisitExpr(Function) there is nothing common between those two blocks. From readability pov, its better to keep them separate I think.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736327262



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       @manupa-arm my bad actually, I rechecked and `unordered_set` is hash based not tree based - sorry about that! In general though, for smaller sets, iterating over the entire set can be either similar time or even faster depending on the complexity of the hashing or tree algorithm.
   
   As for TVM's `Map` vs `unordered_map`, both are hash maps so I don't believe there'll be any performance difference between them. Internally it looks like `Map` uses `unordered_map`:
   https://github.com/apache/tvm/blob/75a8fa1fb7011636066c839c6abd5f1c2e9056ba/include/tvm/runtime/container/map.h#L51
   
   And there's pushes elsewhere in the codebase to use `Map` to be consistent:
   https://github.com/apache/tvm/blob/75a8fa1fb7011636066c839c6abd5f1c2e9056ba/src/relay/backend/te_compiler.h#L61-L63
   
   Given this pass is only meant for CMSIS-NN it's bounded to a lower count of functions which will fit on an embedded device, that is the scope in which this contribution was made.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733866044



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -47,42 +47,93 @@ def partition_for_cmsisnn(mod, params=None, **opts):
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
 
+    tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)

Review comment:
       Done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733874882



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734787732



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Given this is capped at the number of functions, do we know whether this meaningfully effect performance?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735378465



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] Mousius commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

Mousius commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734661754



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions

Review comment:
       Derive, distill, select? 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] areusch commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

areusch commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743906641



##########
File path: tests/python/contrib/test_cmsisnn/test_extract_constants.py
##########
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: extract_constants pass"""
+import itertools
+import math
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+
+from utils import (
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
+
+class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
+    def __init__(self):
+        super().__init__()
+        self.num_constants_ = 0
+
+    def visit_call(self, call):
+        super().visit_call(call)
+        for arg in call.args:
+            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
+                self.num_constants_ += 1
+
+    def visit_function(self, func):
+        super().visit_function(func)
+        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
+
+
+def set_external_func_attr(func, compiler, ext_symbol):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compiler)
+    func = func.with_attr("global_symbol", ext_symbol)
+    return func
+
+
+def test_external_function():
+    y0_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x0 = relay.var("x0", shape=(8, 8))
+    y0_const = relay.const(y0_data, "float32")
+    z0 = x0 + y0_const
+    ef = relay.Function([x0], z0, relay.TensorType((8, 8), "float32"))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+
+    x = relay.var("x", shape=(8, 8))
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_nested_function():
+    y1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x1 = relay.var("x1", shape=(8, 8))
+    y1_const = relay.const(y1_data, "float32")
+    z1 = x1 + y1_const
+    w1 = z1 * relay.const(5.0, "float32")
+    lf = relay.Function([x1], w1, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    c0 = relay.Call(lf, [x0])
+    ef = relay.Function([x0], c0, relay.TensorType((8, 8), "float32"))
+
+    x = relay.var("x", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_multiple_functions():
+    y20_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x20 = relay.var("x20", shape=(8, 8))
+    y20_const = relay.const(y20_data, "float32")
+    z20 = x20 + y20_const
+    f20 = relay.Function([x20], z20, relay.TensorType((8, 8), "float32"))
+
+    y21_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x21 = relay.var("x21", shape=(8, 8))
+    y21_const = relay.const(y21_data, "float32")
+    z21 = x21 + y21_const
+    f21 = relay.Function([x21], z21, relay.TensorType((8, 8), "float32"))
+
+    x10 = relay.var("x10", shape=(8, 8))
+    c10 = relay.Call(f20, [x10])
+    c11 = relay.Call(f21, [c10])
+    ef = relay.Function([x10], c11, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x0])
+    mf = relay.Function([x0], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)

Review comment:
       should we assert anything here? same question above

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);

Review comment:
       is there a way to use the encoding here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743818992



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func1
   main_func <--> ext_func <--> cmsis_func2
   main_func <--> ext_func <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external function ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external functions ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func1 <--> cmsis_func1
   main_func <--> ext_func2 <--> cmsis_func2
   main_func <--> ext_func3 <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743818992



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   main_func <--> ext_func <--> cmsis_func
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func <--> cmsis_func1
   main_func <--> ext_func <--> cmsis_func2
   main_func <--> ext_func <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external function ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Code size -- due to additional external functions ? and more translation units produced ? each ext function will become its own .c file right ?

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func1 <--> cmsis_func1
   main_func <--> ext_func2 <--> cmsis_func2
   main_func <--> ext_func3 <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm merged pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm merged pull request #9331:
URL: https://github.com/apache/tvm/pull/9331


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966106435


   > @manupa-arm / @ashutosh-arm in general if we're adding things that raise exceptions as part of their interface we should be testing them.
   
   I agree for user facing exceptions. What about internal assertions that are not meant to be triggered in a user execution flow and are only meant to be triggered when a "developer" is changing the compilation stack ?
   
   Generally these assertions are derived from assumptions that a user flow will never trigger them and usually are about inputs to a particularly component. 
   
   Having a test that does another assertion to check an assertion on input is working does only seem to give confidence that the function call is successfully able to pass in an argument to trigger the assertion. Ideally, a release build should remove these internal assertions as they were only dev facing assertions.
   
   Let me know if you still think it is worthwhile adding a test check an dev facing assertion that gives us confidence that a component is successfully able to pass in an input that violate an assumption that the developer took that a user flow will never trigger. If so we might want to check the assertion in this PR (here or in a follow up)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966258058


   Alright thanks for the views! Lets not block PR on this -- it is certainly a high bandwidth discussion to be had.
   I think this PR can go in and @ashutosh-arm would you be able to fix the conflicts ?-- so we could merge this.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966204840


   >  if you genuinely disagree feel free to merge without test coverage 
   
   My position is to keep the assert (as originally added by @ashutosh-arm -- not a request for a change). I only oppose removing it on the basis it is not tested.  Im happy to do it in a follow up if thats what @ashutosh-arm wants to do here ?
   
   > his has happened on other PRs I've reviewed.
   
   Hence the questioning here, because this exact point is questioned elsewhere but not here. Other places we committed to follow up if that is direction we want to take. cc : @ekalda @mbaret. Im pretty sure if we agree on this point we will do the necessary changes.
   
   > working on the assumption we want the code to continue working for the forsee-able future
   
   Im pretty sure we all want this -- just being lenient on internal asserts -- that is all. 
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r744674174



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Thanks! @Mousius. Cool as long as there is not overhead of in-direction and multiple source files -- we should be good. :) 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r742694156



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,149 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  Array<PrimExpr> CMSISNNDimensions(const Array<PrimExpr>& shape) {
+    ICHECK(shape.size() == 4) << "Supports only CMSIS-NN shapes of dimension 4.";

Review comment:
       Do we need to test these or not ? Im not sure now. @Mousius ?

##########
File path: src/relay/backend/contrib/cmsisnn/generate_constants.cc
##########
@@ -0,0 +1,230 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/attrs/transform.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+Expr MakeTranspose(Expr data, Array<Integer> axes);
+namespace contrib {
+namespace cmsisnn {
+
+class GenerateConstantsMutator : public MixedModeMutator {
+ public:
+  explicit GenerateConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
+  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
+    auto attrs = make_object<Conv2DAttrs>();
+    attrs->strides = std::move(conv2d_attrs->strides);
+    attrs->padding = std::move(conv2d_attrs->padding);
+    attrs->dilation = std::move(conv2d_attrs->dilation);
+    attrs->groups = conv2d_attrs->groups;
+    attrs->channels = std::move(conv2d_attrs->channels);
+    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
+    attrs->data_layout = std::move(conv2d_attrs->data_layout);
+    attrs->kernel_layout = runtime::String("OHWI");
+    attrs->out_layout = std::move(conv2d_attrs->out_layout);
+    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
+    *new_attrs = tvm::Attrs{attrs};
+
+    IRModule kernel_module;
+    auto func_body = MakeTranspose(kernel_expr, {Integer(3), Integer(0), Integer(1), Integer(2)});
+    auto kernel_func =
+        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
+    GlobalVar kernel_var("main");
+    kernel_module->Add(kernel_var, kernel_func);
+    kernel_module = relay::transform::FoldConstant()(kernel_module);
+    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
+    return kernel_func->body;
+  }
+
+  /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
+   *            function for Conv2D with CMSIS-NN Requantize constants */
+  Expr GenerateConv2dRequantConstants(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // Transpose weights: HWIO -> OHWI
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    tvm::Attrs new_conv2d_attrs;
+    Expr transposed_kernel =
+        ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+
+    // Obtain input and output scales from Relay's Requantization
+    int64_t out_channels = conv2d_attrs->channels.as<IntImmNode>()->value;
+    float output_scale = GetScalarFromConstant<float>(requantize_call->args[3]);
+    auto input_scales = tvm::relay::qnn::GetFloatVectorFromConstant(requantize_call->args[1]);
+    ICHECK(input_scales.size() == static_cast<size_t>(out_channels));
+
+    // Calculate requantization multiplier and shift
+    Device dev{DLDeviceType::kDLCPU, 0};
+    runtime::NDArray multiplier_nda =
+        runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    runtime::NDArray shift_nda = runtime::NDArray::Empty({out_channels}, DataType::Int(32), dev);
+    int32_t* multiplier = static_cast<int32_t*>(multiplier_nda->data);
+    int32_t* shift = static_cast<int32_t*>(shift_nda->data);
+    for (int i = 0; i < out_channels; ++i) {
+      double effective_output_scale =
+          static_cast<double>(input_scales[i]) / static_cast<double>(output_scale);
+      std::tie(*(multiplier + i), *(shift + i)) =
+          tvm::relay::qnn::GetFixedPointMultiplierShift(effective_output_scale);
+    }
+
+    // Create constants from requantization multiplier and shift
+    Constant multiplier_const(multiplier_nda);
+    Constant shift_const(shift_nda);
+
+    // Convert scale scalars into Constants
+    // Scales are expected as Constants by following passes
+    Expr weight_scale = conv2d_call->args[5];
+    Expr req_inp_scale = requantize_call->args[1];
+    if (out_channels == 1) {
+      runtime::NDArray weight_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* weight_scale_p = static_cast<float*>(weight_scale_nda->data);
+      *weight_scale_p = GetScalarFromConstant<float>(weight_scale);
+      weight_scale = Constant(weight_scale_nda);
+
+      runtime::NDArray req_inp_scale_nda =
+          runtime::NDArray::Empty({out_channels}, DataType::Float(32), dev);
+      float* req_inp_scale_p = static_cast<float*>(req_inp_scale_nda->data);
+      *req_inp_scale_p = GetScalarFromConstant<float>(req_inp_scale);
+      req_inp_scale = Constant(req_inp_scale_nda);
+    }
+
+    // Replace existing weights (HWIO) with the transposed ones (OHWI)
+    // Substitute Conv2D weight_zero_point with the CMSIS-NN multiplier
+    // Substitute Requantize input_zero_point with CMSIS-NN shift
+    // Conv2D arguments: data, weight, input_zp, weight_zp, input_sc, weight_sc
+    Array<Expr> conv2d_args = {conv2d_call->args[0], transposed_kernel,    conv2d_call->args[2],
+                               multiplier_const,     conv2d_call->args[4], weight_scale};
+    Call ret_call = Call(conv2d_call->op, conv2d_args, new_conv2d_attrs, {});
+    if (bias_add_call) {
+      ret_call =
+          Call(bias_add_call->op, {ret_call, bias_add_call->args[1]}, bias_add_call->attrs, {});
+    }
+    Array<Expr> requantize_args = {ret_call, req_inp_scale, shift_const, requantize_call->args[3],
+                                   requantize_call->args[4]};
+    ret_call = Call(requantize_call->op, requantize_args, requantize_call->attrs, {});
+    if (clip_call) {
+      ret_call = Call(clip_call->op, {ret_call}, clip_call->attrs, {});
+    }
+    return ret_call;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    auto* global_var = call->op.as<GlobalVarNode>();
+    if (global_var) {
+      // Update to global function call needed because the body changes while
+      // generating new constants
+      Function func = Downcast<Function>(mod_->Lookup(global_var->name_hint));
+      Expr new_body = VisitExpr(func->body);
+      if (!new_body.same_as(func->body)) {
+        Function new_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                                     FreeTypeVars(new_body, mod_), func->attrs);
+        mod_->Update(GetRef<GlobalVar>(global_var), new_func);
+        final_call = Call(GetRef<GlobalVar>(global_var), post_call->args);
+      }
+    }
+
+    // Recreate composite function and corresponding call
+    // Updated composite function contains CMSIS-NN quantized multiplier and shift constants
+    if (call->op.as<FunctionNode>()) {

Review comment:
       Ack

##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Why are we not running this ?

##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {

Review comment:
       Ack




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733863452



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -47,42 +47,93 @@ def partition_for_cmsisnn(mod, params=None, **opts):
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
 
+    tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)

Review comment:
       Makes sense.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733864218



##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")

Review comment:
       Changed it to bias_const.

##########
File path: tests/python/contrib/test_cmsisnn/test_conv2d.py
##########
@@ -0,0 +1,303 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: Conv2D"""
+import itertools
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+from tvm.relay.op.contrib import cmsisnn
+
+
+from tests.python.relay.aot.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    AOT_DEFAULT_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+from utils import (
+    skip_if_no_reference_system,
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+
+def make_model(
+    shape,
+    kernel_shape,
+    input_zp,
+    input_sc,
+    kernel_zp,
+    kernel_sc,
+    output_zp,
+    output_sc,
+    padding,
+    strides,
+    dilation,
+    groups,
+    dtype,
+    kernel_dtype,
+    out_channels,
+    weight_format,
+    enable_bias,
+    relu_type,
+):
+    """Return a model and any parameters it may have"""
+    h_index = weight_format.index("H")
+    w_index = weight_format.index("W")
+    kernel_h = kernel_shape[h_index]
+    kernel_w = kernel_shape[w_index]
+    a = relay.var("in0", shape=shape, dtype=dtype)
+    p = (0, 0, 0, 0)
+    if padding == "SAME":
+        p = get_same_padding((shape[1], shape[2]), (kernel_h, kernel_w), dilation, strides)
+        a = relay.nn.pad(
+            a,
+            pad_width=[(0, 0), (p[0], p[2]), (p[1], p[3]), (0, 0)],
+            pad_value=input_zp,
+            pad_mode="constant",
+        )
+        shape = (shape[0], shape[1] + p[0] + p[2], shape[2] + p[1] + p[3], shape[3])
+
+    weight_shape = (kernel_h, kernel_w, shape[3] // groups, out_channels)
+    w = tvm.nd.array(
+        np.random.randint(
+            np.iinfo(kernel_dtype).min,
+            high=np.iinfo(kernel_dtype).max,
+            size=weight_shape,
+            dtype=kernel_dtype,
+        )
+    )
+    weights = relay.const(w, kernel_dtype)
+    conv = relay.qnn.op.conv2d(
+        a,
+        weights,
+        input_zero_point=relay.const(input_zp, "int32"),
+        kernel_zero_point=relay.const(kernel_zp, "int32"),
+        input_scale=relay.const(input_sc, "float32"),
+        kernel_scale=relay.const(kernel_sc, "float32"),
+        kernel_size=(kernel_h, kernel_w),
+        data_layout="NHWC",
+        kernel_layout=weight_format,
+        dilation=dilation,
+        strides=strides,
+        groups=groups,
+        channels=out_channels,
+        padding=p,
+        out_dtype="int32",
+    )
+    b = tvm.nd.array(np.random.randint(0, high=10, size=(out_channels,), dtype="int32"))
+    bc = relay.const(b, "int32")
+    bias = conv
+    if enable_bias:
+        bias = relay.nn.bias_add(conv, bc, axis=3)
+    requant_input_sc = [sc * input_sc for sc in kernel_sc]
+    req = relay.qnn.op.requantize(
+        bias,
+        relay.const(requant_input_sc, "float32"),
+        relay.const(0, "int32"),
+        relay.const(output_sc, "float32"),
+        relay.const(output_zp, "int32"),
+        out_dtype=dtype,
+    )
+    relu = make_qnn_relu(req, relu_type, output_sc, output_zp, dtype)
+    params = {"w": w, "b": b}
+    return relu, params
+
+
+@tvm.testing.requires_cmsisnn
+@pytest.mark.parametrize("ifm_shape", [(1, 28, 28, 12), (1, 64, 100, 4)])
+@pytest.mark.parametrize("kernel_size", [(3, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((2, 2), (1, 1)), ((1, 1), (1, 1))])
+@pytest.mark.parametrize("enable_bias", [True, False])
+@pytest.mark.parametrize("relu_type", ["NONE", "RELU"])
+@pytest.mark.parametrize(
+    "in_zp, in_sc, k_sc, out_channels",
+    [(10, 0.0128, [0.11, 0.22], 2), (-64, 1, [1, 0.0256, 1.37], 3)],
+)
+def test_op_int8(
+    ifm_shape,
+    kernel_size,
+    padding,
+    strides,
+    dilation,
+    enable_bias,
+    relu_type,
+    in_zp,
+    in_sc,
+    k_sc,
+    out_channels,
+):
+    interface_api = "c"
+    use_unpacked_api = True
+    test_runner = AOT_CORSTONE300_RUNNER
+
+    k_zp = 0

Review comment:
       Ah! Reason for them being cryptic is because they have derived from another target,  but that's no excuse for not changing them.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733877110



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -43,23 +44,153 @@ class RelayToTIRVisitor : public MixedModeVisitor {
   inline IntImm ToArg(int32_t value) { return IntImm(DataType::Int(32), value); }
 
   void CreatePrimFuncForExtern(Array<tir::Var> func_signature,
-                               tvm::Array<PrimExpr> call_extern_args) {
+                               tvm::Array<PrimExpr> call_extern_args, int context_buffer_size = 0) {
     Map<String, ObjectRef> dict_attrs;
     dict_attrs.Set("global_symbol", func_name_);
     dict_attrs.Set("tir.noalias", Bool(true));
 
     tir::Stmt body = tir::Evaluate(
         tvm::tir::Call(DataType::Int(8), tir::builtin::call_extern(), call_extern_args));
 
+    if (context_buffer_size) {
+      // TODO(@ashutosh-arm) while supporting MVE, we need to move allocation through TVMBAW
+      tir::Var buffer_var("context_buffer", PointerType(PrimType(DataType::Int(8)), "global"));
+      body = tir::Allocate(buffer_var, DataType::Int(8), {context_buffer_size}, tir::const_true(),
+                           body);
+    }
+
     primfunc_ = tir::PrimFunc(func_signature, body, VoidType(), Map<tir::Var, tir::Buffer>(),
                               DictAttrs(dict_attrs));
   }
 
+  void EmitConv2D(const Expr& expr) {
+    const CallNode* clip_call = nullptr;
+    const CallNode* requantize_call = nullptr;
+    const CallNode* bias_add_call = nullptr;
+    const CallNode* conv2d_call = nullptr;
+    auto* final_call = expr.as<CallNode>();
+    auto* final_op = final_call->op.as<OpNode>();
+    if (final_op->name == "clip") {
+      clip_call = final_call;
+      requantize_call = clip_call->args[0].as<CallNode>();
+    } else {
+      requantize_call = final_call;
+    }
+    auto* requantize_input = requantize_call->args[0].as<CallNode>();
+    auto* requantize_input_op = requantize_input->op.as<OpNode>();
+    if (requantize_input_op->name == "nn.bias_add") {
+      bias_add_call = requantize_input;
+      conv2d_call = bias_add_call->args[0].as<CallNode>();
+    } else {
+      conv2d_call = requantize_input;
+    }
+
+    // TIR variables are created in the order they appear in the Relay partitioned function
+    // %1 = qnn.conv2d(%input, %weight_const_0, input_zero_point_scalar,
+    //                 %cmsisnn_multiplier_const_1, %input_scale_scalar, %weight_scale_const_2)
+    // %2 = nn.bias_add(%1, %bias_const_3, axis=3)
+    // %3 = qnn.requantize(%2, %input_scale_const_4, %cmsisnn_shift_const_5,
+    //                     %output_scale_scalar, %output_zero_point_scalar)
+    // clip(%3, a_min=%min_scalar, a_max=%max_scalar)
+    auto in_var = tir::Var("input", DataType::Handle(8));
+    auto const_var0 = tir::Var("filter", DataType::Handle(8));         // weight
+    auto const_var1 = tir::Var("multiplier", DataType::Handle(32));    // quant multiplier
+    auto const_var2 = tir::Var("filter_scale", DataType::Handle(32));  // weight scale
+    auto const_var3 = tir::Var("bias", DataType::Handle(32));          // bias
+    auto const_var4 = tir::Var("input_scale", DataType::Handle(32));   // input_scale * weight_scale
+    auto const_var5 = tir::Var("shift", DataType::Handle(32));         // quant shift
+    auto out_var = tir::Var("output", DataType::Handle(8));
+
+    // Individual arguments to the structs arguments of the CMSIS-NN API are filled into call_extern
+    // https://github.com/ARM-software/CMSIS_5/blob/def6f800f95661eb3451d317f7d0dde504f6020d/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c#L50
+
+    // prepare cmsis_nn_conv_params
+    auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
+    int32_t input_offset = -GetScalarFromConstant<int32_t>(conv2d_call->args[2]);
+    int32_t output_offset = GetScalarFromConstant<int32_t>(requantize_call->args[4]);
+    int32_t stride_w = qnn::get_const_int(conv2d_attrs->strides[1]);
+    int32_t stride_h = qnn::get_const_int(conv2d_attrs->strides[0]);
+    int32_t padding_w = qnn::get_const_int(conv2d_attrs->padding[1]);
+    int32_t padding_h = qnn::get_const_int(conv2d_attrs->padding[0]);
+    int32_t dilation_w = qnn::get_const_int(conv2d_attrs->dilation[1]);
+    int32_t dilation_h = qnn::get_const_int(conv2d_attrs->dilation[0]);
+    int32_t clip_min, clip_max;
+    if (clip_call) {
+      auto* clip_attrs = clip_call->attrs.as<ClipAttrs>();
+      clip_min = clip_attrs->a_min;
+      clip_max = clip_attrs->a_max;
+    } else {
+      clip_min = -128;
+      clip_max = 127;
+    }
+
+    // cmsis_nn_dims *input_dims
+    auto input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
+    int32_t input_n = qnn::get_const_int(input_shape[0]);
+    int32_t input_h = qnn::get_const_int(input_shape[1]);
+    int32_t input_w = qnn::get_const_int(input_shape[2]);
+    int32_t input_c = qnn::get_const_int(input_shape[3]);
+
+    // cmsis_nn_dims *filter_dims (OHWI)
+    auto filter_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
+    int32_t filter_n = qnn::get_const_int(filter_shape[0]);
+    int32_t filter_h = qnn::get_const_int(filter_shape[1]);
+    int32_t filter_w = qnn::get_const_int(filter_shape[2]);
+    int32_t filter_c = qnn::get_const_int(filter_shape[3]);
+
+    // cmsis_nn_dims *bias_dims
+    int32_t bias_n = 1;
+    int32_t bias_h = 1;
+    int32_t bias_w = 1;
+    int32_t bias_c = qnn::get_const_int(filter_shape[0]);
+
+    // cmsis_nn_dims *output_dims
+    auto output_shape = conv2d_call->type_as<TensorTypeNode>()->shape;
+    int32_t output_n = qnn::get_const_int(output_shape[0]);
+    int32_t output_h = qnn::get_const_int(output_shape[1]);
+    int32_t output_w = qnn::get_const_int(output_shape[2]);
+    int32_t output_c = qnn::get_const_int(output_shape[3]);
+
+    tvm::Array<PrimExpr> call_ext_args = {tir::StringImm("arm_convolve_wrapper_s8"), in_var,
+                                          const_var0, const_var1};
+    if (bias_add_call) {
+      call_ext_args.push_back(const_var3);
+    }
+    call_ext_args.push_back(const_var5);
+    call_ext_args.push_back(out_var);
+
+    tvm::Array<PrimExpr> scalar_args = {
+        ToArg(input_offset), ToArg(output_offset), ToArg(stride_w),   ToArg(stride_h),
+        ToArg(padding_w),    ToArg(padding_h),     ToArg(dilation_w), ToArg(dilation_h),
+        ToArg(clip_min),     ToArg(clip_max),      ToArg(input_n),    ToArg(input_h),
+        ToArg(input_w),      ToArg(input_c),       ToArg(filter_n),   ToArg(filter_h),
+        ToArg(filter_w),     ToArg(filter_c),      ToArg(bias_n),     ToArg(bias_h),
+        ToArg(bias_w),       ToArg(bias_c),        ToArg(output_n),   ToArg(output_h),
+        ToArg(output_w),     ToArg(output_c),
+    };
+
+    call_ext_args = tvm::runtime::Concat(call_ext_args, scalar_args);
+
+    Array<tir::Var> func_signature{in_var, const_var0, const_var1, const_var2};
+    if (bias_add_call) {
+      func_signature.push_back(const_var3);
+    }
+    func_signature.push_back(const_var4);
+    func_signature.push_back(const_var5);
+    func_signature.push_back(out_var);
+
+    // https://github.com/ARM-software/CMSIS_5/blob/d788fd583984388553391de18afd8b4d2a146868/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c#L367
+    size_t context_buffer_size = (2 * input_c * filter_w * filter_h) * (int32_t)sizeof(int16_t);
+
+    CreatePrimFuncForExtern(func_signature, call_ext_args, context_buffer_size);
+  }
+
   void EmitSoftMax(const Expr& expr) {
     auto* quantize_call = expr.as<CallNode>();
     auto* softmax_call = quantize_call->args[0].as<CallNode>();
     auto* dequant_call = softmax_call->args[0].as<CallNode>();
-    const float quant_scale = GetScalarFromConstant<float>(dequant_call->args[1]);
+    auto* scale_const = dequant_call->args[1].as<ConstantNode>();

Review comment:
       ACK




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r733879032



##########
File path: src/relay/backend/contrib/cmsisnn/relay_to_tir.cc
##########
@@ -223,19 +353,25 @@ class RelayToTIRVisitor : public MixedModeVisitor {
 
     auto comp_name = func->GetAttr<String>(attr::kComposite);
     if (comp_name.defined()) {
-      if (comp_name == "cmsisnn.quantized_softmax") {
+      if (comp_name == "cmsisnn.qnn_conv2d") {
+        EmitConv2D(func->body);
+      }
+      if (comp_name == "cmsisnn.qnn_softmax") {
         EmitSoftMax(func->body);
       }
-      if (comp_name == "cmsisnn.quantized_mul") {
+      if (comp_name == "cmsisnn.qnn_mul") {
         EmitMul(func->body);
       }
-      if (comp_name == "cmsisnn.quantized_add") {
+      if (comp_name == "cmsisnn.qnn_add") {
         EmitAdd(func->body);
       }
     }
   }
 
  public:
+  int32_t kScaledDiffIntegerBits = 5;

Review comment:
       Nope. Moved some of them under private. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743818992



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       The way the current BYOC works, this will create a source per operator and also additional set of indirection will be there for each call.
   
   i.e.
   main_func <--> ext_func1 <--> cmsis_func1
   main_func <--> ext_func2 <--> cmsis_func2
   main_func <--> ext_func3 <--> cmsis_func3
   
   vs
   
   main_func <--> ext_func <--> cmsis_func1, cmsis_func2, cmsis_func3




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#issuecomment-966187544


   > I would question why we're adding them if they're in unreachable code paths, but if we do think they're worth adding I would consider them part of the contract of the internal function and thus should have associated tests for the behaviour (throwing an exception is now part of the functions behaviour).
   
   There is a fine line between defining a failing internal assert as a "behaviour" of the function and removing the assert just because it not executed in the present organization of the components. 
   
   > If we're happy that at some point someone may inadvertently change the behaviour of this function, then potentially it's not a necessary line of code and we can apply the YAGNI principle from extreme programming
   
   Enforcing YAGNI to remove internal asserts will discourage developers from using asserts just because the execution path is not enforced today.
   
   In a summary I would support having internal assertions that makes it easier if another developer changes this code or re-uses some of it somewhere else. **If it means we need a add a borderline meaningful test that uses as an assertion to see if the assertion inside the code is triggered, lets do that then**.  
   
   I guess what you are saying is we need a test for the above ICHECK in this PR?
   
   
   
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] areusch commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

areusch commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743906641



##########
File path: tests/python/contrib/test_cmsisnn/test_extract_constants.py
##########
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""CMSIS-NN integration tests: extract_constants pass"""
+import itertools
+import math
+import numpy as np
+import pytest
+import tvm
+from tvm import relay
+
+from utils import (
+    make_module,
+    count_num_calls,
+    get_range_for_dtype_str,
+    get_same_padding,
+    get_conv2d_qnn_params,
+    make_qnn_relu,
+)
+
+tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
+
+class CheckFunctionsForConstants(tvm.relay.ExprVisitor):
+    def __init__(self):
+        super().__init__()
+        self.num_constants_ = 0
+
+    def visit_call(self, call):
+        super().visit_call(call)
+        for arg in call.args:
+            if isinstance(arg, relay.Constant) and arg.data.numpy().ndim > 0:
+                self.num_constants_ += 1
+
+    def visit_function(self, func):
+        super().visit_function(func)
+        assert self.num_constants_ == 0, "Functions should not have constant arguments in Calls"
+
+
+def set_external_func_attr(func, compiler, ext_symbol):
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", compiler)
+    func = func.with_attr("global_symbol", ext_symbol)
+    return func
+
+
+def test_external_function():
+    y0_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x0 = relay.var("x0", shape=(8, 8))
+    y0_const = relay.const(y0_data, "float32")
+    z0 = x0 + y0_const
+    ef = relay.Function([x0], z0, relay.TensorType((8, 8), "float32"))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+
+    x = relay.var("x", shape=(8, 8))
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_nested_function():
+    y1_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x1 = relay.var("x1", shape=(8, 8))
+    y1_const = relay.const(y1_data, "float32")
+    z1 = x1 + y1_const
+    w1 = z1 * relay.const(5.0, "float32")
+    lf = relay.Function([x1], w1, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    c0 = relay.Call(lf, [x0])
+    ef = relay.Function([x0], c0, relay.TensorType((8, 8), "float32"))
+
+    x = relay.var("x", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x])
+    mf = relay.Function([x], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)
+
+
+def test_multiple_functions():
+    y20_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x20 = relay.var("x20", shape=(8, 8))
+    y20_const = relay.const(y20_data, "float32")
+    z20 = x20 + y20_const
+    f20 = relay.Function([x20], z20, relay.TensorType((8, 8), "float32"))
+
+    y21_data = np.random.uniform(0, 1, (8, 8)).astype("float32")
+    x21 = relay.var("x21", shape=(8, 8))
+    y21_const = relay.const(y21_data, "float32")
+    z21 = x21 + y21_const
+    f21 = relay.Function([x21], z21, relay.TensorType((8, 8), "float32"))
+
+    x10 = relay.var("x10", shape=(8, 8))
+    c10 = relay.Call(f20, [x10])
+    c11 = relay.Call(f21, [c10])
+    ef = relay.Function([x10], c11, relay.TensorType((8, 8), "float32"))
+
+    x0 = relay.var("x0", shape=(8, 8))
+    ev = relay.GlobalVar("external_function")
+    ef = set_external_func_attr(ef, "external_compiler", ev.name_hint)
+    c = relay.Call(ev, [x0])
+    mf = relay.Function([x0], c, relay.TensorType((8, 8), "float32"))
+    mv = relay.GlobalVar("main")
+
+    mod = tvm.IRModule()
+    mod[ev] = ef
+    mod[mv] = mf
+
+    mod = ExtractConstantsFromPartitionedFunction()(mod)
+    CheckFunctionsForConstants().visit_function(mod[ev])
+    relay.transform.InferType()(mod)

Review comment:
       should we assert anything here? same question above

##########
File path: src/relay/backend/contrib/cmsisnn/tir_to_runtime.cc
##########
@@ -54,6 +54,162 @@ class CodeGenCMSISNN : public CodeGenC {
   }
 
  private:
+  /*!  * \brief Emit the CMSIS-NN context buffer */
+  void VisitStmt_(const AllocateNode* op) {
+    context_buffer_name_ = op->buffer_var->name_hint;
+    context_buffer_size_ = op->constant_allocation_size();
+    CodeGenC::VisitStmt_(op);
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) {  // NOLINT(*)
+    if (!op->op.same_as(builtin::call_extern())) {
+      return;
+    }
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+    if (cmsis_func_name == "arm_softmax_s8" || cmsis_func_name == "arm_elementwise_mul_s8" ||
+        cmsis_func_name == "arm_elementwise_add_s8") {
+      CodeGenC::VisitExpr_(op, os);
+    } else if (cmsis_func_name == "arm_convolve_wrapper_s8") {
+      EmitConv2D(op);
+    }
+    return;
+  }
+
+  /*!  * \brief Emits cmsis_nn_context struct */
+  std::string EmitCMSISNNContext(std::ostream& os, std::string buf_name, int buf_size) {
+    std::string struct_name = "context";
+    PrintIndent();
+    os << "cmsis_nn_context " << struct_name << "= {" << buf_name << "," << buf_size << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_conv_params struct */
+  std::string EmitCMSISNNConvParams(std::ostream& os, int32_t input_offset, int32_t output_offset,
+                                    int32_t stride_w, int32_t stride_h, int32_t padding_w,
+                                    int32_t padding_h, int32_t dilation_w, int32_t dilation_h,
+                                    int32_t clip_min, int32_t clip_max) {
+    std::string struct_name = "conv_params";
+    PrintIndent();
+    os << "cmsis_nn_tile stride = {" << stride_w << "," << stride_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile padding = {" << padding_w << "," << padding_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_tile dilation = {" << dilation_w << "," << dilation_h << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_activation activation = {" << clip_min << "," << clip_max << "};\n";
+    PrintIndent();
+    os << "cmsis_nn_conv_params " << struct_name << " = {" << input_offset << ", " << output_offset
+       << ", stride, padding, dilation, activation};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_per_channel_quant_params struct */
+  std::string EmitCMSISNNPerChannelQuantParams(std::ostream& os, std::string multiplier,
+                                               std::string shift) {
+    std::string struct_name = "quant_params";
+    PrintIndent();
+    os << "cmsis_nn_per_channel_quant_params " << struct_name << " = {" << multiplier << ", "
+       << shift << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits cmsis_nn_dims struct */
+  std::string EmitCMSISNNDims(std::ostream& os, std::string tensor_type, int32_t n, int32_t h,
+                              int32_t w, int32_t c) {
+    std::string struct_name = tensor_type + "_dims";
+    PrintIndent();
+    os << "cmsis_nn_dims " << struct_name << " = {" << n << "," << h << "," << w << "," << c
+       << "};\n";
+    return struct_name;
+  }
+
+  /*!  * \brief Emits CMSIS-NN APIs for every call_extern */
+  void EmitConv2D(const CallNode* op) {
+    static const int max_num_args = 33;
+    std::string cmsis_func_name = op->args[0].as<StringImmNode>()->value;
+
+    bool bias_enabled = false;
+    if (op->args.size() == max_num_args) {
+      bias_enabled = true;
+    }
+
+    auto get_var_name = [](const CallNode* op, int id) {
+      return op->args[id].as<VarNode>()->name_hint.c_str();
+    };
+    auto get_arg_value = [](const CallNode* op, int id) {
+      return op->args[id].as<IntImmNode>()->value;
+    };
+    int arg_id = 0;
+    std::string input_data = get_var_name(op, ++arg_id);
+    std::string filter_data = get_var_name(op, ++arg_id);
+    std::string multiplier = get_var_name(op, ++arg_id);
+    std::string bias_data("0x0");
+    if (bias_enabled) {
+      bias_data = get_var_name(op, ++arg_id);
+    }
+    std::string shift = get_var_name(op, ++arg_id);
+    std::string output_data = get_var_name(op, ++arg_id);
+
+    int input_offset = get_arg_value(op, ++arg_id);

Review comment:
       is there a way to use the encoding here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r743811104



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -52,37 +54,85 @@ def partition_for_cmsisnn(mod, params=None, **opts):
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),

Review comment:
       Although, enabling this has no impact on the generated primfunc, but wanted to keep it simple to have 1-1 mapping of single external Relay function to single primfunc.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734653298



##########
File path: python/tvm/relay/op/contrib/cmsisnn.py
##########
@@ -47,42 +47,93 @@ def partition_for_cmsisnn(mod, params=None, **opts):
     if params:
         mod["main"] = bind_params_by_name(mod["main"], params)
 
+    tvm._ffi._init_api("relay.ext.cmsisnn.transform", __name__)
+
     seq = tvm.transform.Sequential(
         [
             transform.InferType(),
             transform.MergeComposite(pattern_table()),
             transform.AnnotateTarget("cmsisnn"),
-            transform.MergeCompilerRegions(),
             transform.PartitionGraph(),
+            GenerateCMSISNNConstants(),
+            ExtractConstantsFromPartitionedFunction(),
+            transform.InferType(),
         ]
     )
-
     return seq(mod)
 
 
 @register_pattern_table("cmsisnn")
 def pattern_table():
     """Get the cmsisnn compiler pattern table."""
 
-    def softmax_pattern():
+    def qnn_softmax_pattern():
+        """Create pattern for quantized softmax"""
         pattern = is_op("qnn.dequantize")(wildcard(), is_constant(), is_constant())
         pattern = is_op("nn.softmax")(pattern)
         pattern = is_op("qnn.quantize")(pattern, is_constant(), is_constant())
         return pattern
 
-    def check_quantized_softmax(extract):
+    def check_qnn_softmax(pattern):
         """Check if softmax is supported by CMSIS-NN."""
-        dequantize_call = extract.args[0].args[0]
-        scale = extract.args[1].data.numpy().item(0)
-        zero_point = extract.args[2].data.numpy().item(0)
+        dequantize_call = pattern.args[0].args[0]
+        scale = pattern.args[1].data.numpy().item(0)
+        zero_point = pattern.args[2].data.numpy().item(0)
 
         # check for dtypes of quantize and dequantize
         return (
             (scale == 1.0 / 256 and zero_point == -128)
-            and extract.attrs.out_dtype == "int8"
+            and pattern.attrs.out_dtype == "int8"
             and dequantize_call.args[0].checked_type.dtype == "int8"
         )
 
+    def qnn_conv2d_pattern():
+        """Create pattern for qnn.conv2D with optional fused relu."""
+        qnn_conv2d = is_op("qnn.conv2d")(
+            wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
+        ).has_attr({"kernel_layout": "HWIO"})
+        bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant())
+        req = is_op("qnn.requantize")(
+            qnn_conv2d | bias_add, is_constant(), is_constant(), is_constant(), is_constant()
+        )
+        clip_or_req = req.optional(is_op("clip"))
+        return clip_or_req
+
+    def check_qnn_conv2d(pattern):
+        """Check if the Conv2D is supported by CMSIS-NN."""
+        if str(pattern.op.name) == "clip":
+            relu = pattern
+            requantize = relu.args[0]
+        else:
+            requantize = pattern
+        requantize_input = requantize.args[0]
+        bias_add = None
+        bias_dtype = "int32"
+        if str(requantize_input.op.name) == "nn.bias_add":
+            bias_add = requantize_input
+            conv2d = bias_add.args[0]
+            bias_dtype = bias_add.args[1].checked_type.dtype
+        else:
+            conv2d = requantize_input
+        conv2d_input = conv2d.args[0]
+        conv2d_weight = conv2d.args[1]
+
+        # kernel zero_point should be 0
+        kernel_zp = conv2d.args[3].data.numpy()
+        kernel_zp = [kernel_zp] if kernel_zp.ndim == 0 else kernel_zp
+
+        return (
+            conv2d.attrs.kernel_layout == "HWIO"

Review comment:
       yes, in the end we convert it into OHWI but atm only from HWIO. I assumed that frontends always convert Conv2D kernel layouts to HWIO. I rechecked: ```Conv2DRel``` does not seem to be the case .




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734824425



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Is it used with .find as well ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735526607



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());
+        for (auto constant : function_to_constants_.at(glob_func)) {
+          new_args.push_back(constant);
+        }
+        final_call = Call(glob_var, new_args);
+      }
+    }
+
+    // Since the constants are kicked out of the local partitioned functions
+    // a new call to local function is needed
+    if (auto* func_node = call->op.as<FunctionNode>()) {

Review comment:
       After some thought, except for the VisitExpr(Function) there is nothing common between those two blocks. From readability pov, its better to keep them separate.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r735463335



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       Oops, it seems like we are discussing about a Map rather than the Array of it.
   Can't we use unordered_map then ?
   
   So my reasoning is using the .find(..) with an Array/Map -- Im not sure the Array/Map is commonly used like that especially when it is an internal data structure that is not exposed to python.
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/tir/transforms/loop_partition.cc#L331
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L854
   
   https://github.com/apache/tvm/blob/df2bcfbd239c486fbad1b2ddf335389cb6c35fb7/src/relay/transforms/fuse_ops.cc#L1008
   
   So two questions here from code quality perspective,
   
    @ashutosh-arm , if the number of partitioned functions are bounded (prefferably to a constant) then it might not make an effect. Is it the case ? -- say if we think of a large network with many operators. If that is the case, it may be fine to overlook this.
   
   @Mousius , Hmmm, The tree is used to make the .find(..) faster. right ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r736311645



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();
+    }
+    return final_func;
+  }
+
+  Expr Rewrite_(const CallNode* call, const Expr& post) final {
+    Expr final_call = post;
+    auto* post_call = post.as<CallNode>();
+    if (post_call == nullptr) {
+      return final_call;
+    }
+
+    // Replace Constant arguments with Vars for ML Operators
+    // Perform this for non-main Call Nodes only
+    if (func_nesting_level_ && call->op.as<OpNode>()) {
+      Array<Expr> new_args;
+      for (auto& arg : post_call->args) {
+        auto* const_arg = arg.as<ConstantNode>();
+        if (const_arg && !const_arg->is_scalar()) {
+          Var var_arg = Var(gen_var_name(), const_arg->tensor_type());
+          new_args.push_back(var_arg);
+          constants_within_function_.push_back(GetRef<Constant>(const_arg));
+        } else {
+          new_args.push_back(arg);
+        }
+      }
+      final_call = Call(call->op, new_args, call->attrs, {});
+    }
+
+    // Since the constants are kicked out of partitioned functions
+    // a new call to global function is needed
+    if (auto* glob_var_node = post_call->op.as<GlobalVarNode>()) {
+      auto glob_var = GetRef<GlobalVar>(glob_var_node);
+      auto glob_func = Downcast<Function>(mod_->Lookup(glob_var));
+      auto new_glob_func = VisitExpr(glob_func);
+      if (!new_glob_func.same_as(glob_func)) {
+        mod_->Update(glob_var, Downcast<Function>(new_glob_func));
+        Array<Expr> new_args = post_call->args;
+        ICHECK(function_to_constants_.find(glob_func) != function_to_constants_.end());

Review comment:
       @Leo-arm, any thoughts of using this working assumption ?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734679648



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;

Review comment:
       yeah, i didn't like keeping a free variable checking for scope like that. Thanks for the alternative.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] ashutosh-arm commented on a change in pull request #9331: [4/10] Code generation for Conv2D via CMSIS-NN

Posted by GitBox <gi...@apache.org>.

ashutosh-arm commented on a change in pull request #9331:
URL: https://github.com/apache/tvm/pull/9331#discussion_r734738825



##########
File path: src/relay/backend/contrib/cmsisnn/extract_constants.cc
##########
@@ -0,0 +1,158 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/runtime/ndarray.h>
+
+#include "../../../qnn/utils.h"
+#include "../../../transforms/pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace cmsisnn {
+
+class ExtractConstantsMutator : public MixedModeMutator {
+ public:
+  explicit ExtractConstantsMutator(IRModule& mod) : mod_(mod) {}
+
+ private:
+  String gen_var_name() { return "tvm_var_extract_const_" + std::to_string(var_count_++); }
+
+  Expr VisitExpr_(const FunctionNode* func) final {
+    Function final_func = GetRef<Function>(func);
+    ++func_nesting_level_;
+    auto new_body = VisitExpr(func->body);
+    --func_nesting_level_;
+    if (!new_body.same_as(func->body)) {
+      final_func = Function(FreeVars(new_body), new_body, func->ret_type,
+                            FreeTypeVars(new_body, mod_), func->attrs);
+      function_to_constants_.Set(GetRef<Function>(func), constants_within_function_);
+      constants_within_function_.clear();

Review comment:
       Done!




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org