You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/04/28 15:57:50 UTC

[GitHub] [incubator-mxnet] access2rohit commented on a change in pull request #18168: [WIP]separate GPU kernel for broadcast_axis

access2rohit commented on a change in pull request #18168:
URL: https://github.com/apache/incubator-mxnet/pull/18168#discussion_r416731516



##########
File path: src/operator/tensor/broadcast_reduce_op.h
##########
@@ -1077,6 +1077,58 @@ struct broadcast_kernel {
   }
 };
 
+template<typename OP>
+struct broadcast_kernel_gpu {
+  template<typename IType, typename OType>
+  MSHADOW_XINLINE static void Map(int32_t i,
+                                  IType *input,
+                                  OType *output,
+                                  const int32_t *input_shape,
+                                  const int32_t *output_shape,
+                                  const int32_t *in_stride,
+                                  const int32_t *out_stride,
+                                  const OpReqType req,
+                                  const int32_t ndim) {
+    int32_t idx = i;
+    int32_t in_idx = i;
+    for (int32_t iter = ndim - 1; iter >= 0; --iter) {
+      int32_t dim_idx = idx % output_shape[iter];
+      if (input_shape[iter] != 1) {
+        in_idx += dim_idx * (in_stride[iter] - out_stride[iter]);
+      } else {
+        in_idx -= dim_idx * out_stride[iter];
+      }
+      idx /= output_shape[iter];
+    }
+    KERNEL_ASSIGN(output[i], req, OP::Map(input[in_idx]));
+  }
+};
+
+template<int req>
+struct compute_offset {
+  MSHADOW_XINLINE static void Map(int32_t i,
+                                  int32_t *in_stride,
+                                  int32_t *out_stride,

Review comment:
       Large Tensor Support is not for GPU's only for CPU's. In order to mitigate the perf regression GPU kernel is made 32-bit explicitly




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org