You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/11/07 22:08:15 UTC

[GitHub] cjolivier01 commented on a change in pull request #8579: Automatic OMP operator tuning based upon kernel operation workload

cjolivier01 commented on a change in pull request #8579: Automatic OMP operator tuning based upon kernel operation workload
URL: https://github.com/apache/incubator-mxnet/pull/8579#discussion_r149518876
 
 

 ##########
 File path: src/operator/mxnet_op.h
 ##########
 @@ -288,55 +291,84 @@ struct op_with_req {
   }
 };
 
-/*!
- * \brief Set to immediate scalar value kernel
- * \tparam val Scalar immediate
- */
-template<int val>
-struct set_to_int {
-  // mxnet_op version (when used directly with Kernel<>::Launch()) */
-  template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType* out) {
-    out[i] = DType(val);
-  }
-  // mshadow_op version (when used with op_with_req<>)
-  MSHADOW_XINLINE static int Map() {
-    return val;
-  }
-};
 
-/*! \brief Special-case kernel shortcut for setting to zero */
-using set_zero = set_to_int<0>;
+/*! \brief Kernel operator wrapper used for tuning data */
+template<typename Operation, typename DType>
+struct tuned_op : public Operation {
+  static size_t workload_;       // nanos per operation * Tuner's WORKLOAD_COUNT
+  // the decision implementation
+  // TODO(cjolivier01): For more complex kernels, add a shape parameter version (diff LaunchEx)
+  static int UseOMP(size_t N, size_t thread_count);
+};
 
 template<typename OP, typename xpu>
 struct Kernel;
 
-
 template<typename OP>
 struct Kernel<OP, cpu> {
+  /*! \brief Launch CPU kernel */
   template<typename ...Args>
-  inline static void Launch(mshadow::Stream<cpu> *s, const int N, Args... args) {
+  inline static void Launch(mshadow::Stream<cpu> *, const int N, Args... args) {
 #ifdef _OPENMP
-    const int omp_cores = Engine::Get()->num_omp_threads_per_worker();
-    if (omp_cores <= 1) {
+    const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+    if (omp_threads < 2) {
       // Zero means not to use OMP, but don't interfere with external OMP behavior
       for (int i = 0; i < N; ++i) {
         OP::Map(i, args...);
       }
     } else {
-      #pragma omp parallel for num_threads(omp_cores)
+      #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < N; ++i) {
         OP::Map(i, args...);
       }
     }
 #else
     for (int i = 0; i < N; ++i) {
+      OP::Map(i, args...);
+    }
+#endif
+  }
+
+  /*! \brief Launch CPU kernel which has OMP tuning data available.
+   * When using this for a new kernel op, add declaration and tuning objects to
+   * operator_tune.cc
+   */
+  template<typename BasicOperation, typename DType, typename ...Args>
+  static void LaunchEx(mshadow::Stream<cpu> *, const int N, DType *dest, Args... args) {
+#ifdef _OPENMP
+    const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+    if (omp_threads < 2 || !tuned_op<BasicOperation, DType>::UseOMP(N, omp_threads)) {
+      // Zero means not to use OMP, but don't interfere with external OMP behavior
+      for (int i = 0; i < N; ++i) {
+        OP::Map(i, dest, args...);
+      }
+    } else {
+      #pragma omp parallel for num_threads(omp_threads)
+      for (int i = 0; i < N; ++i) {
+        OP::Map(i, dest, args...);
+      }
+    }
+#else
+    for (int i = 0; i < N; ++i) {
         OP::Map(i, args...);
 
 Review comment:
   dest not added here

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services