You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by cj...@apache.org on 2017/11/04 03:28:44 UTC

[incubator-mxnet] branch master updated: Move OpenMP calculations into its own class (#8475)

This is an automated email from the ASF dual-hosted git repository.

cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 90ba62a  Move OpenMP calculations into its own class (#8475)
90ba62a is described below

commit 90ba62a9eae9061d3763d594a552a367e550fe4f
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Fri Nov 3 20:28:41 2017 -0700

    Move OpenMP calculations into its own class (#8475)
    
    * Different calls into OMP class
    
    * Copy from 'tuner' branch
    
    * Fix for _OPENMP not defined, class still exists
    
    * fix bracket
    
    * Added more comments
    
    * fix condition when CSRNDArray is used in NDArrayIter with shuffle=True (#8374)
    
    * fix condition when CSRNDArray is used in NDArrayIter with shuffle=True
    
    * fix indent
    
    * add unittest for invalid inputs
    
    * replace AssertErr with NotImplErr
    
    * update comment
    
    * trigger ci
    
    * Fix urllib bug affecting Python 3.x in the finetune tutorial (#8229)
    
    * changed url references from dmlc to apache/incubator-mxnet
    
    * added python 3 support for dataset download with urllib
    
    * added python 3 support for dataset download with urllib
    
    * removed unintended edits
    
    * Build fix, add more documentation
    
    * lint
    
    * commiting v12 changess (#8478)
    
    * Simplified unary/binary math operators (#8361) (#8361)
    
    * bump up version (#8488)
    
    * fix makenonlossgrad bug (#8508)
    
    * fix expand_dims if axis< 0 (#8489)
    
    * fix expand_dims if axis< 0
    
    * Update test_operator.py
    
    * Trigger build
---
 include/mxnet/engine.h                  |  5 --
 src/engine/naive_engine.cc              | 14 +-----
 src/engine/openmp.cc                    | 83 ++++++++++++++++++++++++++++++++
 src/engine/openmp.h                     | 85 +++++++++++++++++++++++++++++++++
 src/engine/thread_pool.h                |  4 +-
 src/engine/threaded_engine.h            | 15 +-----
 src/engine/threaded_engine_perdevice.cc |  6 +--
 src/nnvm/legacy_op_util.cc              |  6 ++-
 8 files changed, 181 insertions(+), 37 deletions(-)

diff --git a/include/mxnet/engine.h b/include/mxnet/engine.h
index 30aecbd..4048d5a 100644
--- a/include/mxnet/engine.h
+++ b/include/mxnet/engine.h
@@ -272,11 +272,6 @@ class MXNET_API Engine {
    * \return Number of OMP threads that should be used per worker
    */
   virtual int num_omp_threads_per_worker() const = 0;
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   */
-  virtual void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) = 0;
 };  // class Engine
 #endif  // DMLC_USE_CXX11
 }  // namespace mxnet
diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index a31101c..7e3554a 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -26,7 +26,7 @@
 #include <thread>
 #include "./engine_impl.h"
 #include "./profiler.h"
-#include "threaded_engine.h"
+#include "./openmp.h"
 
 namespace mxnet {
 namespace engine {
@@ -47,7 +47,6 @@ class NaiveEngine final : public Engine {
   };
 
   NaiveEngine() {
-    set_num_omp_threads_per_worker(ThreadedEngine::DefaultOMPThreadsPerWorker());
   }
   // virtual destructor
   virtual ~NaiveEngine() {
@@ -193,14 +192,7 @@ class NaiveEngine final : public Engine {
    * \return Number of OMP threads that should be used per worker
    */
   int num_omp_threads_per_worker() const override {
-    return num_omp_threads_per_worker_;
-  }
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   */
-  void set_num_omp_threads_per_worker(int num_threads_per_worker) override {
-    num_omp_threads_per_worker_ = num_threads_per_worker;
+    return OpenMP::Get()->GetRecommendedOMPThreadCount();
   }
 
  private:
@@ -218,8 +210,6 @@ class NaiveEngine final : public Engine {
   mshadow::Stream<cpu> cpu_stream_;
   // GPU streams
   std::vector<mshadow::Stream<gpu>*> streams_;
-  /*! \brief Number of OMP threads to be used per worker */
-  int num_omp_threads_per_worker_{0};
 };  // class NaiveEngine
 
 
diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc
new file mode 100644
index 0000000..a605f97
--- /dev/null
+++ b/src/engine/openmp.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <dmlc/omp.h>
+#include <dmlc/base.h>
+#include <dmlc/parameter.h>
+#include <climits>
+#include "./openmp.h"
+
+namespace mxnet {
+namespace engine {
+
+#if defined(__i386__) || defined(_M_X86) || defined(_M_X64) || defined(__x86_64__)
+#define ARCH_IS_INTEL_X86
+#endif
+
+OpenMP *OpenMP::Get() {
+  static OpenMP openMP;
+  return &openMP;
+}
+
+OpenMP::OpenMP()
+  : omp_num_threads_set_in_environment(dmlc::GetEnv("OMP_NUM_THREADS", INT_MIN) == INT_MIN) {
+#ifdef _OPENMP
+  if (!omp_num_threads_set_in_environment) {
+    omp_set_nested(true);
+    omp_set_dynamic(false);
+  }
+  const int max = dmlc::GetEnv("MXNET_OMP_MAX_THREADS", INT_MIN);
+  if (max != INT_MIN) {
+    omp_thread_max_ = max;
+  } else {
+#ifdef ARCH_IS_INTEL_X86
+    omp_thread_max_ = omp_get_num_procs() >> 1;
+#endif
+  }
+#else
+  enabled_ = false;
+  omp_thread_max_ = 1;
+#endif
+}
+
+int OpenMP::GetRecommendedOMPThreadCount() const {
+#ifdef _OPENMP
+  if (omp_num_threads_set_in_environment) {
+    return omp_get_max_threads();
+  }
+  if (enabled_) {
+#ifdef ARCH_IS_INTEL_X86
+    // x86 does hyperthreading, but do to cache issues, it's faster to only use # true CPUs
+    const int thread_count = omp_get_max_threads() >> 1;
+#else
+    const int thread_count = omp_get_max_threads();
+#endif
+    if (!omp_thread_max_ || thread_count < omp_thread_max_) {
+      return thread_count;
+    }
+    return omp_thread_max_;
+  }
+  return 1;
+#else
+  return 1;
+#endif
+}
+
+}  // namespace engine
+}  // namespace mxnet
+
diff --git a/src/engine/openmp.h b/src/engine/openmp.h
new file mode 100644
index 0000000..8b995a6
--- /dev/null
+++ b/src/engine/openmp.h
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#ifndef MXNET_ENGINE_OPENMP_H_
+#define MXNET_ENGINE_OPENMP_H_
+
+namespace mxnet {
+namespace engine {
+
+/*! \brief OpenMP wrapper and management class
+ *         This class manages a layer on top of the OMP implementation and does not
+ *         interact bidirectionally with the OMP implementation for all behaviors
+ *         (i.e. it's meant to be use explicitly for explicit arguments to omp pragmas
+ *         without affecting the behavior when no arguments are given)
+ */
+class OpenMP {
+ public:
+  OpenMP();
+
+  /*!
+   * \brief Get the recommended number of OMP threads to use given the current context
+   * \return Recommended number of OMP threads to use in a parallel operation
+   */
+  int GetRecommendedOMPThreadCount() const;
+
+  /*!
+   * \brief Set whether clients of this class receive pro-OMP behavior guidance
+   * \param enabled Set to 'true' if this class should provide OMP behavior
+   */
+  void set_enabled(bool enabled) { enabled_ = enabled; }
+  bool enabled() const { return enabled_; }
+
+  /*!
+   * \brief Set maximum number of threads to be used in an OMP region
+   * \param thread_max Maximum number of threads to be used in an OMP region
+   */
+  void set_thread_max(int thread_max) { omp_thread_max_ = thread_max; }
+  /*!
+   * \brief Maximum number of threads to be used in an OMP region
+   * \return Maximum number of threads
+   */
+  int thread_max() const { return omp_thread_max_; }
+
+  /*!
+   * \brief Get the OpenMP object's singleton pointer
+   * \return
+   */
+  static OpenMP *Get();
+
+ private:
+  /*!
+   * \brief Whether OpenMP layer is enabled (use more then one thread).  Independent of OMP library
+   *        behavior
+   */
+  volatile bool enabled_ = true;
+  /*!
+   * \brief Maximum number of threads for any OMP region
+   */
+  volatile int omp_thread_max_ = 0;
+  /*!
+   * \brief Whether OMP_NUM_THREADS was set in the environment.  If it is, we fall back to
+   *        the OMP's implementation's handling of that environment variable
+   */
+  const bool omp_num_threads_set_in_environment;
+};
+
+}  // namespace engine
+}  // namespace mxnet
+
+#endif  // MXNET_ENGINE_OPENMP_H_
diff --git a/src/engine/thread_pool.h b/src/engine/thread_pool.h
index b6fe3c2..a4c1e33 100644
--- a/src/engine/thread_pool.h
+++ b/src/engine/thread_pool.h
@@ -57,8 +57,8 @@ class ThreadPool {
 
     /*! \brief Signal event upon destruction, even for exceptions (RAII) */
     struct SetReadyOnDestroy {
-      explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> event)
-        : event_(event) {
+      explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> *event)
+        : event_(*event) {
       }
       inline ~SetReadyOnDestroy() {
         if (event_) {
diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
index a4f61af..3cf6653 100644
--- a/src/engine/threaded_engine.h
+++ b/src/engine/threaded_engine.h
@@ -38,6 +38,7 @@
 #include <thread>
 #include "./engine_impl.h"
 #include "./profiler.h"
+#include "./openmp.h"
 #include "../common/object_pool.h"
 
 namespace mxnet {
@@ -287,7 +288,6 @@ class ThreadedEngine : public Engine {
     objpool_var_ref_    = common::ObjectPool<ThreadedVar>::_GetSharedRef();
 
     /*! \brief Set default OMP threads per kernel worker to default */
-    set_num_omp_threads_per_worker(DefaultOMPThreadsPerWorker());
   }
   ~ThreadedEngine() {
     {
@@ -387,15 +387,7 @@ class ThreadedEngine : public Engine {
    * \return Number of OMP threads that should be used per worker
    */
   int num_omp_threads_per_worker() const override {
-    return num_omp_threads_per_worker_;
-  }
-
-  /*! \brief Set the number of OMP threads that should be used per worker
-   * \param num_threads_per_worker Number of OMP threads to be used per worker
-   * TODO(cjolivier01) Dynamically adjust based upon number of concurrent CPU jobs
-   */
-  void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) override {
-    num_omp_threads_per_worker_ = num_omp_threads_per_worker;
+    return OpenMP::Get()->GetRecommendedOMPThreadCount();
   }
 
  private:
@@ -444,9 +436,6 @@ class ThreadedEngine : public Engine {
   std::shared_ptr<common::ObjectPool<VersionedVarBlock> > objpool_varblk_ref_;
   std::shared_ptr<common::ObjectPool<ThreadedVar> >       objpool_var_ref_;
 
-  /*! \brief Number of OMP threads to be used per worker */
-  int num_omp_threads_per_worker_{1};
-
   /*!
    * \brief Disallow copy construction and assignment.
    */
diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index fe5a874..6086879 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -114,7 +114,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
           auto ptr =
           gpu_copy_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used,  no Kernel Launch OMP (temporary behavior)
-            Engine::Get()->set_num_omp_threads_per_worker(1);
+            OpenMP::Get()->set_enabled(false);
             auto blk = new ThreadWorkerBlock<kCopyQueue>();
               blk->pool.reset(new ThreadPool(
                 nthread,
@@ -134,7 +134,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
         } else {
           auto ptr = gpu_normal_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
             // Signify to kernel that GPU is being used,  no Kernel Launch OMP (temporary behavior)
-            Engine::Get()->set_num_omp_threads_per_worker(1);
+            OpenMP::Get()->set_enabled(false);
               auto blk = new ThreadWorkerBlock<kWorkerQueue>();
               blk->pool.reset(new ThreadPool(
                 nthread,
@@ -196,7 +196,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
 #if MXNET_USE_CUDA
     mshadow::Stream<gpu> *stream;
     do {
-      ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(ready_event);
+      ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(&ready_event);
       // allocate stream
       mshadow::SetDevice<gpu>(ctx.dev_id);
       if (is_copy_worker) {
diff --git a/src/nnvm/legacy_op_util.cc b/src/nnvm/legacy_op_util.cc
index 6e60178..e5d1d1c 100644
--- a/src/nnvm/legacy_op_util.cc
+++ b/src/nnvm/legacy_op_util.cc
@@ -30,8 +30,10 @@ class ParsedOpProp {
   std::vector<std::string> outputs;
   // initializer
   void Init(const NodeAttrs& attrs) {
-    std::vector<std::pair<std::string, std::string> > kwargs(
-        attrs.dict.begin(), attrs.dict.end());
+    // For performance, do a reserve first and then copy attrs.dict
+    std::vector<std::pair<std::string, std::string> > kwargs;
+    kwargs.reserve(attrs.dict.size());
+    kwargs.insert(kwargs.end(), attrs.dict.begin(), attrs.dict.end());
     try {
       ptr->Init(kwargs);
     } catch (const dmlc::ParamError& e) {

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].