You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2022/05/18 06:09:44 UTC

[GitHub] [incubator-mxnet] anko-intel commented on a diff in pull request #21004: broadcast_like CPU optimization

anko-intel commented on code in PR #21004:
URL: https://github.com/apache/incubator-mxnet/pull/21004#discussion_r860672535


##########
src/operator/tensor/broadcast_reduce_op.h:
##########
@@ -1640,7 +1728,7 @@ void LpNormCompute(const nnvm::NodeAttrs& attrs,
   }
 #else
   const std::string& red = param.ord == 1 ? "red::sum{}" : "red::nrm2{}";
-  const std::string& op  = param.ord == 1 ? "abs" : "identity";
+  const std::string& op = param.ord == 1 ? "abs" : "identity";

Review Comment:
   Did you format the code with clang-formatter ? It looks strange that it breaks previous good alignment.



##########
src/operator/tensor/broadcast_reduce_op.h:
##########
@@ -1354,6 +1354,79 @@ struct direct_copy {
   }
 };
 
+template <typename IType, typename OType>
+void BroadcastCPU(const OpContext& ctx,
+                  const std::vector<TBlob>& inputs,
+                  const std::vector<OpReqType>& req,
+                  const std::vector<TBlob>& outputs,
+                  const mxnet::TShape& src_shape,
+                  const mxnet::TShape& dst_shape,
+                  ShapeAndStride aux_data) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  using namespace mxnet_op;
+  constexpr size_t ELEMENTS_THRESHOLD = 256;
+  Stream<cpu>* s                      = ctx.get_stream<cpu>();
+
+  std::vector<size_t> elements_to_copy(aux_data.num_broadcast_axes);
+  std::vector<size_t> preaxis_dims(aux_data.num_broadcast_axes);
+  for (int ax = 0; ax < aux_data.num_broadcast_axes; ax++) {
+    index_t axis = aux_data.axes[ax];
+
+    elements_to_copy[ax] = 1;
+    for (int i = axis + 1; i < dst_shape.ndim(); i++) {
+      elements_to_copy[ax] *= dst_shape[i];
+    }
+
+    preaxis_dims[ax] = src_shape[0];
+    for (int i = 1; i < axis; i++) {
+      preaxis_dims[ax] *= src_shape[i];
+    }
+  }
+
+  // there is no need to check further axis' elements to copy as it for sure will be larger
+  if (elements_to_copy[0] < ELEMENTS_THRESHOLD || !std::is_same<IType, OType>::value) {
+    IType* src = static_cast<IType*>(inputs[0].dptr_);
+    OType* dst = static_cast<OType*>(outputs[0].dptr_);
+
+    const int ndim = dst_shape.ndim() == 2 ? 2 : MXNET_SPECIAL_MAX_NDIM;
+    Kernel<broadcast_kernel_cpu<mshadow_op::identity>, cpu>::Launch(
+        s, src_shape.Size(), src, dst, aux_data, req[0], ndim);
+
+  } else {
+    IType* src = static_cast<IType*>(inputs[0].dptr_);
+    IType* dst = static_cast<IType*>(outputs[0].dptr_);
+    // broadcast axis independently with result reusage
+    const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+    for (int ax = 0; ax < aux_data.num_broadcast_axes; ax++) {
+      index_t axis     = aux_data.axes[ax];
+      size_t bcast_dim = dst_shape[axis];
+
+#pragma omp parallel num_threads(omp_threads)
+      {
+        // start from the end to avoid overwriting values when src == dst
+        for (int i = preaxis_dims[ax] - 1; i >= 0; i--) {
+#pragma omp for
+          for (int j = bcast_dim - 1; j >= 0; j--) {
+#pragma GCC diagnostic push
+#if __GNUC__ >= 8
+#pragma GCC diagnostic ignored "-Wclass-memaccess"
+#endif
+            std::memcpy(dst + (elements_to_copy[ax] * (j + i * bcast_dim)),
+                        src + (elements_to_copy[ax] * i),
+                        elements_to_copy[ax] * sizeof(IType));
+#pragma GCC diagnostic pop
+          }
+        }
+      }
+      // when first of broadcastable axis is broadcasted,
+      // run same algorithm for next brodcast axis with 'new' input
+      // this is why loops are iterating from the end
+      src = dst;
+    }
+  }
+}
+
 /**
  * When CPU context is used the no. of kernel launches are equal to

Review Comment:
   is this comment still valid ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@mxnet.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org