You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by cj...@apache.org on 2017/11/04 03:28:44 UTC
[incubator-mxnet] branch master updated: Move OpenMP calculations
into its own class (#8475)
This is an automated email from the ASF dual-hosted git repository.
cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 90ba62a Move OpenMP calculations into its own class (#8475)
90ba62a is described below
commit 90ba62a9eae9061d3763d594a552a367e550fe4f
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Fri Nov 3 20:28:41 2017 -0700
Move OpenMP calculations into its own class (#8475)
* Different calls into OMP class
* Copy from 'tuner' branch
* Fix for _OPENMP not defined, class still exists
* fix bracket
* Added more comments
* fix condition when CSRNDArray is used in NDArrayIter with shuffle=True (#8374)
* fix condition when CSRNDArray is used in NDArrayIter with shuffle=True
* fix indent
* add unittest for invalid inputs
* replace AssertErr with NotImplErr
* update comment
* trigger ci
* Fix urllib bug affecting Python 3.x in the finetune tutorial (#8229)
* changed url references from dmlc to apache/incubator-mxnet
* added python 3 support for dataset download with urllib
* added python 3 support for dataset download with urllib
* removed unintended edits
* Build fix, add more documentation
* lint
* commiting v12 changess (#8478)
* Simplified unary/binary math operators (#8361) (#8361)
* bump up version (#8488)
* fix makenonlossgrad bug (#8508)
* fix expand_dims if axis< 0 (#8489)
* fix expand_dims if axis< 0
* Update test_operator.py
* Trigger build
---
include/mxnet/engine.h | 5 --
src/engine/naive_engine.cc | 14 +-----
src/engine/openmp.cc | 83 ++++++++++++++++++++++++++++++++
src/engine/openmp.h | 85 +++++++++++++++++++++++++++++++++
src/engine/thread_pool.h | 4 +-
src/engine/threaded_engine.h | 15 +-----
src/engine/threaded_engine_perdevice.cc | 6 +--
src/nnvm/legacy_op_util.cc | 6 ++-
8 files changed, 181 insertions(+), 37 deletions(-)
diff --git a/include/mxnet/engine.h b/include/mxnet/engine.h
index 30aecbd..4048d5a 100644
--- a/include/mxnet/engine.h
+++ b/include/mxnet/engine.h
@@ -272,11 +272,6 @@ class MXNET_API Engine {
* \return Number of OMP threads that should be used per worker
*/
virtual int num_omp_threads_per_worker() const = 0;
-
- /*! \brief Set the number of OMP threads that should be used per worker
- * \param num_threads_per_worker Number of OMP threads to be used per worker
- */
- virtual void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) = 0;
}; // class Engine
#endif // DMLC_USE_CXX11
} // namespace mxnet
diff --git a/src/engine/naive_engine.cc b/src/engine/naive_engine.cc
index a31101c..7e3554a 100644
--- a/src/engine/naive_engine.cc
+++ b/src/engine/naive_engine.cc
@@ -26,7 +26,7 @@
#include <thread>
#include "./engine_impl.h"
#include "./profiler.h"
-#include "threaded_engine.h"
+#include "./openmp.h"
namespace mxnet {
namespace engine {
@@ -47,7 +47,6 @@ class NaiveEngine final : public Engine {
};
NaiveEngine() {
- set_num_omp_threads_per_worker(ThreadedEngine::DefaultOMPThreadsPerWorker());
}
// virtual destructor
virtual ~NaiveEngine() {
@@ -193,14 +192,7 @@ class NaiveEngine final : public Engine {
* \return Number of OMP threads that should be used per worker
*/
int num_omp_threads_per_worker() const override {
- return num_omp_threads_per_worker_;
- }
-
- /*! \brief Set the number of OMP threads that should be used per worker
- * \param num_threads_per_worker Number of OMP threads to be used per worker
- */
- void set_num_omp_threads_per_worker(int num_threads_per_worker) override {
- num_omp_threads_per_worker_ = num_threads_per_worker;
+ return OpenMP::Get()->GetRecommendedOMPThreadCount();
}
private:
@@ -218,8 +210,6 @@ class NaiveEngine final : public Engine {
mshadow::Stream<cpu> cpu_stream_;
// GPU streams
std::vector<mshadow::Stream<gpu>*> streams_;
- /*! \brief Number of OMP threads to be used per worker */
- int num_omp_threads_per_worker_{0};
}; // class NaiveEngine
diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc
new file mode 100644
index 0000000..a605f97
--- /dev/null
+++ b/src/engine/openmp.cc
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <dmlc/omp.h>
+#include <dmlc/base.h>
+#include <dmlc/parameter.h>
+#include <climits>
+#include "./openmp.h"
+
+namespace mxnet {
+namespace engine {
+
+#if defined(__i386__) || defined(_M_X86) || defined(_M_X64) || defined(__x86_64__)
+#define ARCH_IS_INTEL_X86
+#endif
+
+OpenMP *OpenMP::Get() {
+ static OpenMP openMP;
+ return &openMP;
+}
+
+OpenMP::OpenMP()
+ : omp_num_threads_set_in_environment(dmlc::GetEnv("OMP_NUM_THREADS", INT_MIN) == INT_MIN) {
+#ifdef _OPENMP
+ if (!omp_num_threads_set_in_environment) {
+ omp_set_nested(true);
+ omp_set_dynamic(false);
+ }
+ const int max = dmlc::GetEnv("MXNET_OMP_MAX_THREADS", INT_MIN);
+ if (max != INT_MIN) {
+ omp_thread_max_ = max;
+ } else {
+#ifdef ARCH_IS_INTEL_X86
+ omp_thread_max_ = omp_get_num_procs() >> 1;
+#endif
+ }
+#else
+ enabled_ = false;
+ omp_thread_max_ = 1;
+#endif
+}
+
+int OpenMP::GetRecommendedOMPThreadCount() const {
+#ifdef _OPENMP
+ if (omp_num_threads_set_in_environment) {
+ return omp_get_max_threads();
+ }
+ if (enabled_) {
+#ifdef ARCH_IS_INTEL_X86
+ // x86 does hyperthreading, but do to cache issues, it's faster to only use # true CPUs
+ const int thread_count = omp_get_max_threads() >> 1;
+#else
+ const int thread_count = omp_get_max_threads();
+#endif
+ if (!omp_thread_max_ || thread_count < omp_thread_max_) {
+ return thread_count;
+ }
+ return omp_thread_max_;
+ }
+ return 1;
+#else
+ return 1;
+#endif
+}
+
+} // namespace engine
+} // namespace mxnet
+
diff --git a/src/engine/openmp.h b/src/engine/openmp.h
new file mode 100644
index 0000000..8b995a6
--- /dev/null
+++ b/src/engine/openmp.h
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#ifndef MXNET_ENGINE_OPENMP_H_
+#define MXNET_ENGINE_OPENMP_H_
+
+namespace mxnet {
+namespace engine {
+
+/*! \brief OpenMP wrapper and management class
+ * This class manages a layer on top of the OMP implementation and does not
+ * interact bidirectionally with the OMP implementation for all behaviors
+ * (i.e. it's meant to be use explicitly for explicit arguments to omp pragmas
+ * without affecting the behavior when no arguments are given)
+ */
+class OpenMP {
+ public:
+ OpenMP();
+
+ /*!
+ * \brief Get the recommended number of OMP threads to use given the current context
+ * \return Recommended number of OMP threads to use in a parallel operation
+ */
+ int GetRecommendedOMPThreadCount() const;
+
+ /*!
+ * \brief Set whether clients of this class receive pro-OMP behavior guidance
+ * \param enabled Set to 'true' if this class should provide OMP behavior
+ */
+ void set_enabled(bool enabled) { enabled_ = enabled; }
+ bool enabled() const { return enabled_; }
+
+ /*!
+ * \brief Set maximum number of threads to be used in an OMP region
+ * \param thread_max Maximum number of threads to be used in an OMP region
+ */
+ void set_thread_max(int thread_max) { omp_thread_max_ = thread_max; }
+ /*!
+ * \brief Maximum number of threads to be used in an OMP region
+ * \return Maximum number of threads
+ */
+ int thread_max() const { return omp_thread_max_; }
+
+ /*!
+ * \brief Get the OpenMP object's singleton pointer
+ * \return
+ */
+ static OpenMP *Get();
+
+ private:
+ /*!
+ * \brief Whether OpenMP layer is enabled (use more then one thread). Independent of OMP library
+ * behavior
+ */
+ volatile bool enabled_ = true;
+ /*!
+ * \brief Maximum number of threads for any OMP region
+ */
+ volatile int omp_thread_max_ = 0;
+ /*!
+ * \brief Whether OMP_NUM_THREADS was set in the environment. If it is, we fall back to
+ * the OMP's implementation's handling of that environment variable
+ */
+ const bool omp_num_threads_set_in_environment;
+};
+
+} // namespace engine
+} // namespace mxnet
+
+#endif // MXNET_ENGINE_OPENMP_H_
diff --git a/src/engine/thread_pool.h b/src/engine/thread_pool.h
index b6fe3c2..a4c1e33 100644
--- a/src/engine/thread_pool.h
+++ b/src/engine/thread_pool.h
@@ -57,8 +57,8 @@ class ThreadPool {
/*! \brief Signal event upon destruction, even for exceptions (RAII) */
struct SetReadyOnDestroy {
- explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> event)
- : event_(event) {
+ explicit inline SetReadyOnDestroy(std::shared_ptr<SimpleEvent> *event)
+ : event_(*event) {
}
inline ~SetReadyOnDestroy() {
if (event_) {
diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
index a4f61af..3cf6653 100644
--- a/src/engine/threaded_engine.h
+++ b/src/engine/threaded_engine.h
@@ -38,6 +38,7 @@
#include <thread>
#include "./engine_impl.h"
#include "./profiler.h"
+#include "./openmp.h"
#include "../common/object_pool.h"
namespace mxnet {
@@ -287,7 +288,6 @@ class ThreadedEngine : public Engine {
objpool_var_ref_ = common::ObjectPool<ThreadedVar>::_GetSharedRef();
/*! \brief Set default OMP threads per kernel worker to default */
- set_num_omp_threads_per_worker(DefaultOMPThreadsPerWorker());
}
~ThreadedEngine() {
{
@@ -387,15 +387,7 @@ class ThreadedEngine : public Engine {
* \return Number of OMP threads that should be used per worker
*/
int num_omp_threads_per_worker() const override {
- return num_omp_threads_per_worker_;
- }
-
- /*! \brief Set the number of OMP threads that should be used per worker
- * \param num_threads_per_worker Number of OMP threads to be used per worker
- * TODO(cjolivier01) Dynamically adjust based upon number of concurrent CPU jobs
- */
- void set_num_omp_threads_per_worker(int num_omp_threads_per_worker) override {
- num_omp_threads_per_worker_ = num_omp_threads_per_worker;
+ return OpenMP::Get()->GetRecommendedOMPThreadCount();
}
private:
@@ -444,9 +436,6 @@ class ThreadedEngine : public Engine {
std::shared_ptr<common::ObjectPool<VersionedVarBlock> > objpool_varblk_ref_;
std::shared_ptr<common::ObjectPool<ThreadedVar> > objpool_var_ref_;
- /*! \brief Number of OMP threads to be used per worker */
- int num_omp_threads_per_worker_{1};
-
/*!
* \brief Disallow copy construction and assignment.
*/
diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
index fe5a874..6086879 100644
--- a/src/engine/threaded_engine_perdevice.cc
+++ b/src/engine/threaded_engine_perdevice.cc
@@ -114,7 +114,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
auto ptr =
gpu_copy_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
// Signify to kernel that GPU is being used, no Kernel Launch OMP (temporary behavior)
- Engine::Get()->set_num_omp_threads_per_worker(1);
+ OpenMP::Get()->set_enabled(false);
auto blk = new ThreadWorkerBlock<kCopyQueue>();
blk->pool.reset(new ThreadPool(
nthread,
@@ -134,7 +134,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
} else {
auto ptr = gpu_normal_workers_.Get(ctx.dev_id, [this, ctx, is_copy, nthread]() {
// Signify to kernel that GPU is being used, no Kernel Launch OMP (temporary behavior)
- Engine::Get()->set_num_omp_threads_per_worker(1);
+ OpenMP::Get()->set_enabled(false);
auto blk = new ThreadWorkerBlock<kWorkerQueue>();
blk->pool.reset(new ThreadPool(
nthread,
@@ -196,7 +196,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
#if MXNET_USE_CUDA
mshadow::Stream<gpu> *stream;
do {
- ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(ready_event);
+ ThreadPool::SimpleEvent::SetReadyOnDestroy setReady(&ready_event);
// allocate stream
mshadow::SetDevice<gpu>(ctx.dev_id);
if (is_copy_worker) {
diff --git a/src/nnvm/legacy_op_util.cc b/src/nnvm/legacy_op_util.cc
index 6e60178..e5d1d1c 100644
--- a/src/nnvm/legacy_op_util.cc
+++ b/src/nnvm/legacy_op_util.cc
@@ -30,8 +30,10 @@ class ParsedOpProp {
std::vector<std::string> outputs;
// initializer
void Init(const NodeAttrs& attrs) {
- std::vector<std::pair<std::string, std::string> > kwargs(
- attrs.dict.begin(), attrs.dict.end());
+ // For performance, do a reserve first and then copy attrs.dict
+ std::vector<std::pair<std::string, std::string> > kwargs;
+ kwargs.reserve(attrs.dict.size());
+ kwargs.insert(kwargs.end(), attrs.dict.begin(), attrs.dict.end());
try {
ptr->Init(kwargs);
} catch (const dmlc::ParamError& e) {
--
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].