You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/11/02 20:35:58 UTC

[GitHub] cjolivier01 closed pull request #8475: Move OpenMP calculations into its own class

cjolivier01 closed pull request #8475: Move OpenMP calculations into its own class
URL: https://github.com/apache/incubator-mxnet/pull/8475
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/include/mxnet/engine.h b/include/mxnet/engine.h
index 30aecbdd8c..4048d5a1a3 100644
--- a/include/mxnet/engine.h
+++ b/include/mxnet/engine.h
@@ -272,11 +272,6 @@ class MXNET_API Engine {
    * \return Number of OMP threads that should be used per worker
    */
   virtual int num_omp_threads_per_worker() const = 0;
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   */
-  virtual void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) = 0;
 };  // class Engine
 #endif  // DMLC_USE_CXX11
 }  // namespace mxnet
diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index a31101c353..7e3554ab1c 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -26,7 +26,7 @@
 #include <thread>
 #include "./engine_impl.h"
 #include "./profiler.h"
-#include "threaded_engine.h"
+#include "./openmp.h"
 
 namespace mxnet {
 namespace engine {
@@ -47,7 +47,6 @@ class NaiveEngine final : public Engine {
   };
 
   NaiveEngine() {
-    set_num_omp_threads_per_worker(ThreadedEngine::DefaultOMPThreadsPerWorker());
   }
   // virtual destructor
   virtual ~NaiveEngine() {
@@ -193,14 +192,7 @@ class NaiveEngine final : public Engine {
    * \return Number of OMP threads that should be used per worker
    */
   int num_omp_threads_per_worker() const override {
-    return num_omp_threads_per_worker_;
-  }
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   */
-  void set_num_omp_threads_per_worker(int num_threads_per_worker) override {
-    num_omp_threads_per_worker_ = num_threads_per_worker;
+    return OpenMP::Get()->GetRecommendedOMPThreadCount();
   }
 
  private:
@@ -218,8 +210,6 @@ class NaiveEngine final : public Engine {
   mshadow::Stream<cpu> cpu_stream_;
   // GPU streams
   std::vector<mshadow::Stream<gpu>*> streams_;
-  /*! \brief Number of OMP threads to be used per worker */
-  int num_omp_threads_per_worker_{0};
 };  // class NaiveEngine
 
 
diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc
new file mode 100644
index 0000000000..a605f977b6
--- /dev/null
+++ b/src/engine/openmp.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <dmlc/omp.h>
+#include <dmlc/base.h>
+#include <dmlc/parameter.h>
+#include <climits>
+#include "./openmp.h"
+
+namespace mxnet {
+namespace engine {
+
+#if defined(__i386__) || defined(_M_X86) || defined(_M_X64) || defined(__x86_64__)
+#define ARCH_IS_INTEL_X86
+#endif
+
+OpenMP *OpenMP::Get() {
+  static OpenMP openMP;
+  return &openMP;
+}
+
+OpenMP::OpenMP()
+  : omp_num_threads_set_in_environment(dmlc::GetEnv("OMP_NUM_THREADS", INT_MIN) == INT_MIN) {
+#ifdef _OPENMP
+  if (!omp_num_threads_set_in_environment) {
+    omp_set_nested(true);
+    omp_set_dynamic(false);
+  }
+  const int max = dmlc::GetEnv("MXNET_OMP_MAX_THREADS", INT_MIN);
+  if (max != INT_MIN) {
+    omp_thread_max_ = max;
+  } else {
+#ifdef ARCH_IS_INTEL_X86
+    omp_thread_max_ = omp_get_num_procs() >> 1;
+#endif
+  }
+#else
+  enabled_ = false;
+  omp_thread_max_ = 1;
+#endif
+}
+
+int OpenMP::GetRecommendedOMPThreadCount() const {
+#ifdef _OPENMP
+  if (omp_num_threads_set_in_environment) {
+    return omp_get_max_threads();
+  }
+  if (enabled_) {
+#ifdef ARCH_IS_INTEL_X86
+    // x86 does hyperthreading, but do to cache issues, it's faster to only use # true CPUs
+    const int thread_count = omp_get_max_threads() >> 1;
+#else
+    const int thread_count = omp_get_max_threads();
+#endif
+    if (!omp_thread_max_ || thread_count < omp_thread_max_) {
+      return thread_count;
+    }
+    return omp_thread_max_;
+  }
+  return 1;
+#else
+  return 1;
+#endif
+}
+
+}  // namespace engine
+}  // namespace mxnet
+
diff --git a/src/engine/openmp.h b/src/engine/openmp.h
new file mode 100644
index 0000000000..8b995a6357
--- /dev/null
+++ b/src/engine/openmp.h
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#ifndef MXNET_ENGINE_OPENMP_H_
+#define MXNET_ENGINE_OPENMP_H_
+
+namespace mxnet {
+namespace engine {
+
+/*! \brief OpenMP wrapper and management class
+ *         This class manages a layer on top of the OMP implementation and does not
+ *         interact bidirectionally with the OMP implementation for all behaviors
+ *         (i.e. it's meant to be use explicitly for explicit arguments to omp pragmas
+ *         without affecting the behavior when no arguments are given)
+ */
+class OpenMP {
+ public:
+  OpenMP();
+
+  /*!
+   * \brief Get the recommended number of OMP threads to use given the current context
+   * \return Recommended number of OMP threads to use in a parallel operation
+   */
+  int GetRecommendedOMPThreadCount() const;
+
+  /*!
+   * \brief Set whether clients of this class receive pro-OMP behavior guidance
+   * \param enabled Set to 'true' if this class should provide OMP behavior
+   */
+  void set_enabled(bool enabled) { enabled_ = enabled; }
+  bool enabled() const { return enabled_; }
+
+  /*!
+   * \brief Set maximum number of threads to be used in an OMP region
+   * \param thread_max Maximum number of threads to be used in an OMP region
+   */
+  void set_thread_max(int thread_max) { omp_thread_max_ = thread_max; }
+  /*!
+   * \brief Maximum number of threads to be used in an OMP region
+   * \return Maximum number of threads
+   */
+  int thread_max() const { return omp_thread_max_; }
+
+  /*!
+   * \brief Get the OpenMP object's singleton pointer
+   * \return
+   */
+  static OpenMP *Get();
+
+ private:
+  /*!
+   * \brief Whether OpenMP layer is enabled (use more then one thread).  Independent of OMP library
+   *        behavior
+   */
+  volatile bool enabled_ = true;
+  /*!
+   * \brief Maximum number of threads for any OMP region
+   */
+  volatile int omp_thread_max_ = 0;
+  /*!
+   * \brief Whether OMP_NUM_THREADS was set in the environment.  If it is, we fall back to
+   *        the OMP's implementation's handling of that environment variable
+   */
+  const bool omp_num_threads_set_in_environment;
+};
+
+}  // namespace engine
+}  // namespace mxnet
+
+#endif  // MXNET_ENGINE_OPENMP_H_
diff --git a/src/engine/thread_pool.h b/src/engine/thread_pool.h
index b6fe3c2d5d..a4c1e3321a 100644
--- a/src/engine/thread_pool.h
+++ b/src/engine/thread_pool.h
@@ -57,8 +57,8 @@ class ThreadPool {
 
     /*! \brief Signal event upon destruction, even for exceptions (RAII) */
     struct SetReadyOnDestroy {
-      explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> event)
-        : event_(event) {
+      explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> *event)
+        : event_(*event) {
       }
       inline ~SetReadyOnDestroy() {
         if (event_) {
diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
index a4f61affc5..3cf6653712 100644
--- a/src/engine/threaded_engine.h
+++ b/src/engine/threaded_engine.h
@@ -38,6 +38,7 @@
 #include <thread>
 #include "./engine_impl.h"
 #include "./profiler.h"
+#include "./openmp.h"
 #include "../common/object_pool.h"
 
 namespace mxnet {
@@ -287,7 +288,6 @@ class ThreadedEngine : public Engine {
     objpool_var_ref_    = common::ObjectPool<ThreadedVar>::_GetSharedRef();
 
     /*! \brief Set default OMP threads per kernel worker to default */
-    set_num_omp_threads_per_worker(DefaultOMPThreadsPerWorker());
   }
   ~ThreadedEngine() {
     {
@@ -387,15 +387,7 @@ class ThreadedEngine : public Engine {
    * \return Number of OMP threads that should be used per worker
    */
   int num_omp_threads_per_worker() const override {
-    return num_omp_threads_per_worker_;
-  }
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   * TODO(cjolivier01) Dynamically adjust based upon number of concurrent CPU jobs
-   */
-  void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) override {
-    num_omp_threads_per_worker_ = num_omp_threads_per_worker;
+    return OpenMP::Get()->GetRecommendedOMPThreadCount();
   }
 
  private:
@@ -444,9 +436,6 @@ class ThreadedEngine : public Engine {
   std::shared_ptr<common::ObjectPool<VersionedVarBlock> > objpool_varblk_ref_;
   std::shared_ptr<common::ObjectPool<ThreadedVar> >       objpool_var_ref_;
 
-  /*! \brief Number of OMP threads to be used per worker */
-  int num_omp_threads_per_worker_{1};
-
   /*!
    * \brief Disallow copy construction and assignment.
    */
diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index fe5a874024..60868797a6 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -114,7 +114,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
           auto ptr =
           gpu_copy_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used,  no Kernel Launch OMP (temporary behavior)
-            Engine::Get()->set_num_omp_threads_per_worker(1);
+            OpenMP::Get()->set_enabled(false);
             auto blk = new ThreadWorkerBlock<kCopyQueue>();
               blk->pool.reset(new ThreadPool(
                 nthread,
@@ -134,7 +134,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
         } else {
           auto ptr = gpu_normal_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used,  no Kernel Launch OMP (temporary behavior)
-            Engine::Get()->set_num_omp_threads_per_worker(1);
+            OpenMP::Get()->set_enabled(false);
               auto blk = new ThreadWorkerBlock<kWorkerQueue>();
               blk->pool.reset(new ThreadPool(
                 nthread,
@@ -196,7 +196,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
 #if MXNET_USE_CUDA
     mshadow::Stream<gpu> *stream;
     do {
-      ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(ready_event);
+      ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(&ready_event);
       // allocate stream
       mshadow::SetDevice<gpu>(ctx.dev_id);
       if (is_copy_worker) {
diff --git a/src/nnvm/legacy_op_util.cc b/src/nnvm/legacy_op_util.cc
index 6e60178008..e5d1d1c8de 100644
--- a/src/nnvm/legacy_op_util.cc
+++ b/src/nnvm/legacy_op_util.cc
@@ -30,8 +30,10 @@ class ParsedOpProp {
   std::vector<std::string> outputs;
   // initializer
   void Init(const NodeAttrs& attrs) {
-    std::vector<std::pair<std::string, std::string> > kwargs(
-        attrs.dict.begin(), attrs.dict.end());
+    // For performance, do a reserve first and then copy attrs.dict
+    std::vector<std::pair<std::string, std::string> > kwargs;
+    kwargs.reserve(attrs.dict.size());
+    kwargs.insert(kwargs.end(), attrs.dict.begin(), attrs.dict.end());
     try {
       ptr->Init(kwargs);
     } catch (const dmlc::ParamError& e) {


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services