You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/15 13:20:51 UTC

[GitHub] pengzhao-intel commented on a change in pull request #10311: [MXNET-107]Fused GRU implementation for CPU

pengzhao-intel commented on a change in pull request #10311: [MXNET-107]Fused GRU implementation for CPU
URL: https://github.com/apache/incubator-mxnet/pull/10311#discussion_r188281801
 
 

 ##########
 File path: src/operator/rnn_impl.h
 ##########
 @@ -454,4 +457,803 @@ void LstmBackward(DType* ws,
     dy_ptr = dx.dptr_;
   }
 }
+
+template<typename DType>
+void GruForwardInferenceSingleLayer(DType* ws,
+                                    DType* tmp_buf,
+                                    bool state_outputs,
+                                    const int D,
+                                    const int T,
+                                    const int N,
+                                    const int I,
+                                    const int H,
+                                    const Tensor<cpu, 2, DType> &x,
+                                    const Tensor<cpu, 2, DType> &hx,
+                                    DType* wx_ptr,
+                                    DType* wh_ptr,
+                                    DType* bx_ptr,
+                                    DType* bh_ptr,
+                                    DType* y_ptr,
+                                    DType* hy_ptr) {
+  DType* ht = y_ptr;
+  DType* ht_1 = y_ptr;
+  DType* back_ht_1 = y_ptr + (T-1) * N * H * D + H;
+  DType* back_ht = back_ht_1;
+  DType* gemmC1  = ws;              // [D, T, N, 3 * H]
+  DType* gemmC2  = gemmC1 + D * T * N * 3 * H;  // N * 3 * H
+  DType* rt = gemmC2 + N * 3 * H;
+  DType* zt = rt + N * H;
+  DType* nt = zt + N * H;
+  DType* back_wx_ptr = wx_ptr + I * 3 * H + H * 3 * H;
+  DType* back_wh_ptr = wh_ptr + I * 3 * H + H * 3 * H;
+  DType* back_bx_ptr = (bx_ptr != NULL)? bx_ptr + 3 * H * 2 : NULL;
+  DType* back_bh_ptr = (bh_ptr != NULL)? bh_ptr + 3 * H * 2: NULL;
+  DType* back_gemmC1 = gemmC1 + T * N * 3 * H;
+  DType* gemmC1_t = gemmC1;
+
+  const Tensor<cpu, 2, DType> wx(wx_ptr, Shape2(H * 3, I));
+  const Tensor<cpu, 2, DType> wh(wh_ptr, Shape2(H * 3, H));
+  const Tensor<cpu, 2, DType> bx(bx_ptr, Shape2(3, H));
+  const Tensor<cpu, 2, DType> bh(bh_ptr, Shape2(3, H));
+  const Tensor<cpu, 2, DType> back_wx(back_wx_ptr, Shape2(H * 3, I));
+  const Tensor<cpu, 2, DType> back_wh(back_wh_ptr, Shape2(H * 3, H));
+  const Tensor<cpu, 2, DType> back_bx(back_bx_ptr, Shape2(3, H));
+  const Tensor<cpu, 2, DType> back_bh(back_bh_ptr, Shape2(3, H));
+
+  if (D == UNIDIRECT) {
+    #pragma omp parallel for
+    for (int i = 0; i < N; i++)
+      for (int j = 0; j < H; j++) {
+        y_ptr[i * H + j] = hx[i][j];
+      }
+  } else {
+    #pragma omp parallel for
 
 Review comment:
   Get thread number by `mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount()`. 
   Please change all omp by this way :)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services