You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/12/20 21:11:43 UTC

[GitHub] [tvm] jacobbohlin opened a new pull request #9778: [microNPU][2c] Add performance modelling to cascader

jacobbohlin opened a new pull request #9778:
URL: https://github.com/apache/tvm/pull/9778


   RFC: apache/tvm-rfcs#37
   Issue: #9429
   
   NOTE: This PR builds on top of #9469 and #9471 and therefore includes those changes. This PR will remain as 'draft' until both dependencies are merged.
   
   The algorithm described in the RFC uses two metrics for pareto culling, performance and memory usage. This commit addresses the former and introduces the basis of performance estimation for the Parts. It also includes performance estimation code that is specific to ethosu_conv2d.
   
   The output of the performance model is only meant to be consumed by the cascader.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbaret commented on a change in pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

mbaret commented on a change in pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#discussion_r783103861



##########
File path: src/contrib/ethosu/cascader/parts/ethosu.cc
##########
@@ -32,84 +35,155 @@ namespace contrib {
 namespace ethosu {
 namespace cascader {
 
-const std::vector<int> EthosuPartNode::GetBlockShape(const StripeConfig& output_stripe_config,
-                                                     bool is_rollling) {
-  std::vector<int> block_shape;
-  for (int axis : output_stripe_config->GetShape()) {
-    block_shape.push_back(std::min(axis, 4));
-  }
-  return block_shape;
-}
+const std::vector<int64_t> EthosuPartNode::GetBytesRead(const std::vector<int> block_shape,
+                                                        const std::vector<int> full_shape) {
+  std::vector<int64_t> bytes_per_input(propagators_.size(), 0);
 
-const std::vector<int> EthosuPartNode::GetBlockInputBytes_(const std::vector<int>& block_shape) {
-  std::vector<int> bytes_per_input;
-  std::vector<float> strides;
   std::vector<int> order;
   std::vector<int> stripes;
   std::vector<int> offset;
+  std::vector<float> strides;
   for (size_t i = 0; i < block_shape.size(); i++) {
-    strides.push_back(1.0);
     order.push_back(1);
-    stripes.push_back(1);
+    stripes.push_back(round_up_divide(full_shape[i], block_shape[i]));
     offset.push_back(0);
+    strides.push_back(static_cast<float>(block_shape[i]));
   }
-  StripeConfig output_block_config(block_shape, block_shape, strides, order, stripes, offset);
+
+  StripeConfig output_block_config(block_shape, full_shape, strides, order, stripes, offset);
   auto input_block_configs = CalculateInputStripeConfigs(output_block_config);
+
+  int i = 0;
   for (const auto& input_block_config : input_block_configs) {
-    bytes_per_input.push_back(mul_reduce(input_block_config->GetShape()));
+    std::map<std::vector<int>, int> input_blocks = CountStripes(input_block_config, false);
+
+    for (const auto& block : input_blocks) {
+      bytes_per_input[i] += mul_reduce(block.first) * block.second;
+    }
+    i++;
   }
+
+  if (weight_tensor_idx_ != -1) {
+    bytes_per_input[weight_tensor_idx_] *= (stripes[height_idx_] * stripes[width_idx_]);
+  }
+
   return bytes_per_input;
 }
 
+const BlockConfig EthosuPartNode::GetBlockConfig(const StripeConfig& output_stripe_config) {
+  BlockConfig best_block_config;
+  float best_cost = std::numeric_limits<float>::infinity();
+  std::vector<int> output_stripe_shape = output_stripe_config->GetShape();
+
+  for (const auto& block_config : valid_block_configs_) {
+    std::vector<int> output_block = block_config->GetOutputBlockShape();
+
+    std::vector<int64_t> bytes_per_input = GetBytesRead(output_block, output_stripe_shape);
+    bytes_per_input[0] *= subkernels_;
+
+    // Calculate bytes read per output element
+    float relative_cost =
+        (bytes_per_input[0] + bytes_per_input[1]) / mul_reduce(output_stripe_shape);
+
+    // Single buffering hardware optimization
+    if (mul_reduce(output_stripe_shape) <= 2 * mul_reduce(output_block)) {
+      relative_cost /= 2;
+    }
+
+    if (relative_cost < best_cost) {
+      best_block_config = block_config;
+      best_cost = relative_cost;
+    }
+  }
+
+  return best_block_config;
+}
+
 const PerformanceInfo EthosuPartNode::GetPerformanceInfo(const StripeConfig& output_stripe_config,
-                                                         bool is_rolling) {
-  std::vector<int> block_shape = GetBlockShape(output_stripe_config, is_rolling);
-  std::vector<int> bytes_per_input = GetBlockInputBytes_(block_shape);
-  int bytes_per_output = mul_reduce(block_shape);
-  int num_blocks = 1;
+                                                         BufferMode buffer_mode) {
+  BlockConfig block_config = GetBlockConfig(output_stripe_config);
+  std::vector<int> block_shape = block_config->GetOutputBlockShape();
+
+  std::vector<int64_t> bytes_per_input =
+      GetBytesRead(block_shape, output_stripe_config->GetShape());
+
+  int elements_per_block = mul_reduce(block_shape);
+  int bytes_per_output = elements_per_block;
+  float num_blocks = 1.0f;
   for (size_t i = 0; i < block_shape.size(); i++) {
-    if (!is_rolling) {
-      num_blocks *= output_stripe_config->GetShape()[i] * output_stripe_config->GetStripes()[i] /
+    if (buffer_mode == BufferMode::RECOMPUTE) {
+      num_blocks *= static_cast<float>(output_stripe_config->GetShape()[i] *
+                                       output_stripe_config->GetStripes()[i]) /
                     block_shape[i];
     } else {
-      num_blocks *= output_stripe_config->GetExtent()[i] / block_shape[i];
+      num_blocks *= static_cast<float>(output_stripe_config->GetExtent()[i]) / block_shape[i];
     }
   }

Review comment:
       Just to mention that this logic is placeholder and will be replaced in a later patch.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm commented on pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

manupa-arm commented on pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#issuecomment-1014711284


   Thanks! @jacobbohlin @mbaret 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbaret commented on pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

mbaret commented on pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#issuecomment-1011149778


   Just a quick note on the test coverage of this feature. The results of the performance model are not explicitly tested against the FVP because we don’t have performance instrumentation available in CI. We will however be testing this component downstream where such instrumentation is available.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] manupa-arm merged pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

manupa-arm merged pull request #9778:
URL: https://github.com/apache/tvm/pull/9778


   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] mbaret commented on pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

mbaret commented on pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#issuecomment-1014704238


   cc @manupa-arm could you take a look and merge if everything's OK? Thanks


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org

[GitHub] [tvm] jacobbohlin commented on pull request #9778: [microNPU][2c] Add performance modelling to cascader

Posted by GitBox <gi...@apache.org>.

jacobbohlin commented on pull request #9778:
URL: https://github.com/apache/tvm/pull/9778#issuecomment-998273707


   @mbaret @manupa-arm @NicolaLancellotti


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org