You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by cj...@apache.org on 2017/12/06 16:49:06 UTC
[incubator-mxnet] branch master updated: Small first iteration for
profiler: use nonblocking queue (#8803)
This is an automated email from the ASF dual-hosted git repository.
cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 43d534d Small first iteration for profiler: use nonblocking queue (#8803)
43d534d is described below
commit 43d534d2ca87315531eb7f2715b8a5c2f033f25c
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Wed Dec 6 08:48:53 2017 -0800
Small first iteration for profiler: use nonblocking queue (#8803)
* Adjust profiling unit test
* Switch profiler to use nonblocking queue
* Profiler on by default (there's no overhead when not turned on)
* lint
* Set commit for dmlc-core
* Fix amalgamation
* Commit
* delete opr_stat
* delete opr_stat
* Set new dmlc-core commit
* Remove dual delete
* Change commit on dmlc-core
* Set newer dmlc-core commit
* Try to set dmlc-core commit again
* Excluse windows.h for non-Windows build
---
CMakeLists.txt | 2 +-
amalgamation/amalgamation.py | 10 ++++++++-
dmlc-core | 2 +-
src/engine/profiler.cc | 41 +++++++++++++---------------------
src/engine/profiler.h | 22 ++++++++++++++----
tests/python/unittest/test_profiler.py | 11 +++++----
6 files changed, 49 insertions(+), 39 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50f6008..4febbe4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ mxnet_option(USE_MKL_EXPERIMENTAL "Use experimental MKL (if MKL enabled and foun
mxnet_option(USE_OPERATOR_TUNING "Enable auto-tuning of operators" ON AND NOT MSVC)
mxnet_option(USE_GPERFTOOLS "Build with GPerfTools support (if found)" ON)
mxnet_option(USE_JEMALLOC "Build with Jemalloc support" ON)
-mxnet_option(USE_PROFILER "Build with Profiler support" OFF)
+mxnet_option(USE_PROFILER "Build with Profiler support" ON)
mxnet_option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF)
mxnet_option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF)
mxnet_option(USE_PLUGIN_CAFFE "Use Caffe Plugin" OFF)
diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py
index 2aba8f4..b378817 100644
--- a/amalgamation/amalgamation.py
+++ b/amalgamation/amalgamation.py
@@ -17,6 +17,7 @@
import sys
import os.path, re, StringIO
+import platform
blacklist = [
'Windows.h', 'cublas_v2.h', 'cuda/tensor_gpu-inl.cuh',
@@ -26,7 +27,8 @@ blacklist = [
'malloc.h', 'mkl.h', 'mkl_cblas.h', 'mkl_vsl.h', 'mkl_vsl_functions.h',
'nvml.h', 'opencv2/opencv.hpp', 'sys/stat.h', 'sys/types.h', 'cuda.h', 'cuda_fp16.h',
'omp.h', 'execinfo.h', 'packet/sse-inl.h', 'emmintrin.h', 'thrust/device_vector.h',
- 'cusolverDn.h'
+ 'cusolverDn.h', 'internal/concurrentqueue_internal_debug.h', 'relacy/relacy_std.hpp',
+ 'relacy_shims.h'
]
minimum = int(sys.argv[6]) if len(sys.argv) > 5 else 0
@@ -36,6 +38,12 @@ android = int(sys.argv[7]) if len(sys.argv) > 6 else 0
if minimum != 0:
blacklist.append('linalg.h')
+if platform.system() != 'Darwin':
+ blacklist.append('TargetConditionals.h')
+
+if platform.system() != 'Windows':
+ blacklist.append('windows.h')
+
def pprint(lst):
for item in lst:
print item
diff --git a/dmlc-core b/dmlc-core
index 87b7ffa..ebbda66 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 87b7ffa59eb78f753073ac56f5f60e46d930b93c
+Subproject commit ebbda66217efd5f7e16ace68cae0128a7117d081
diff --git a/src/engine/profiler.cc b/src/engine/profiler.cc
index 44ad138..13f8cca 100644
--- a/src/engine/profiler.cc
+++ b/src/engine/profiler.cc
@@ -24,12 +24,8 @@
*/
#include <dmlc/base.h>
#include <dmlc/logging.h>
+#include <dmlc/omp.h>
#include <mxnet/base.h>
-#include <set>
-#include <map>
-#include <mutex>
-#include <chrono>
-#include <iostream>
#include <fstream>
#include <thread>
#include "./profiler.h"
@@ -44,7 +40,6 @@
namespace mxnet {
namespace engine {
-const int INITIAL_SIZE = 1024;
Profiler::Profiler()
: state_(kNotRunning), enable_output_(false), filename_("profile.json") {
@@ -59,14 +54,13 @@ Profiler::Profiler()
#endif
this->profile_stat = new DevStat[cpu_num_ + gpu_num_ + 1];
- this->profile_stat->opr_exec_stats.reserve(INITIAL_SIZE);
for (unsigned int i = 0; i < cpu_num_; ++i) {
- profile_stat[i].dev_name = "cpu/" + std::to_string(i);
+ profile_stat[i].dev_name_ = "cpu/" + std::to_string(i);
}
for (unsigned int i = 0; i < gpu_num_; ++i) {
- profile_stat[cpu_num_ + i].dev_name = "gpu/" + std::to_string(i);
+ profile_stat[cpu_num_ + i].dev_name_ = "gpu/" + std::to_string(i);
}
- profile_stat[cpu_num_ + gpu_num_].dev_name = "cpu pinned/";
+ profile_stat[cpu_num_ + gpu_num_].dev_name_ = "cpu pinned/";
mode_ = (ProfilerMode)dmlc::GetEnv("MXNET_PROFILER_MODE", static_cast<int>(kOnlySymbolic));
if (dmlc::GetEnv("MXNET_PROFILER_AUTOSTART", 0)) {
@@ -99,7 +93,7 @@ void Profiler::SetConfig(ProfilerMode mode, std::string output_filename) {
}
OprExecStat *Profiler::AddOprStat(int dev_type, uint32_t dev_id) {
- OprExecStat* opr_stat = new OprExecStat;
+ std::unique_ptr<OprExecStat> opr_stat(new OprExecStat);
opr_stat->dev_type = dev_type;
opr_stat->dev_id = dev_id;
opr_stat->opr_name[sizeof(opr_stat->opr_name)-1] = '\0';
@@ -116,16 +110,13 @@ OprExecStat *Profiler::AddOprStat(int dev_type, uint32_t dev_id) {
idx = cpu_num_ + gpu_num_;
break;
default:
- LOG(FATAL) << "Unkown dev_type";
+ LOG(FATAL) << "Unknown dev_type: " << dev_type;
return NULL;
}
DevStat& dev_stat = profile_stat[idx];
- {
- std::lock_guard<std::mutex> lock{dev_stat.m_};
- dev_stat.opr_exec_stats.push_back(opr_stat);
- }
- return opr_stat;
+ dev_stat.opr_exec_stats_->enqueue(opr_stat.get());
+ return opr_stat.release();
}
void Profiler::EmitPid(std::ostream *os, const std::string& name, uint32_t pid) {
@@ -167,19 +158,17 @@ void Profiler::DumpProfile() {
for (uint32_t i = 0; i < dev_num; ++i) {
const DevStat &d = profile_stat[i];
- this->EmitPid(&file, d.dev_name, i);
+ this->EmitPid(&file, d.dev_name_, i);
file << ",\n";
}
bool first_flag = true;
for (uint32_t i = 0; i < dev_num; ++i) {
DevStat &d = profile_stat[i];
- std::lock_guard<std::mutex> lock(d.m_);
- uint32_t opr_num = d.opr_exec_stats.size();
-
- for (uint32_t j = 0; j < opr_num; ++j) {
- const OprExecStat* opr_stat = d.opr_exec_stats[j];
-
+ OprExecStat *_opr_stat;
+ while (d.opr_exec_stats_->try_dequeue(_opr_stat)) {
+ CHECK_NOTNULL(_opr_stat);
+ std::unique_ptr<OprExecStat> opr_stat(_opr_stat); // manage lifecycle
uint32_t pid = i;
uint32_t tid = opr_stat->thread_id;
@@ -190,10 +179,10 @@ void Profiler::DumpProfile() {
}
file << std::endl;
this->EmitEvent(&file, opr_stat->opr_name, "category", "B",
- opr_stat->opr_start_rel_micros, pid, tid);
+ opr_stat->opr_start_rel_micros, pid, tid);
file << ",\n";
this->EmitEvent(&file, opr_stat->opr_name, "category", "E",
- opr_stat->opr_end_rel_micros, pid, tid);
+ opr_stat->opr_end_rel_micros, pid, tid);
}
}
diff --git a/src/engine/profiler.h b/src/engine/profiler.h
index dbbc773..ebd9420 100644
--- a/src/engine/profiler.h
+++ b/src/engine/profiler.h
@@ -25,6 +25,7 @@
#ifndef MXNET_ENGINE_PROFILER_H_
#define MXNET_ENGINE_PROFILER_H_
+#include <dmlc/concurrentqueue.h>
#include <vector>
#include <string>
#include <mutex>
@@ -65,11 +66,24 @@ struct OprExecStat {
*/
struct DevStat {
/*! \brief device name */
- std::string dev_name;
+ std::string dev_name_;
/*! \brief operation execution statistics on this device */
- std::vector<OprExecStat*> opr_exec_stats;
- /*! \brief internal mutex of the execution state */
- std::mutex m_;
+ std::shared_ptr<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>> opr_exec_stats_ =
+ std::make_shared<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>>();
+
+ /*!
+ * \brief Destructor, clean up allocated objects
+ * TODO(cjolivier01) Investigate queueing unique_ptr<>'s if it won't hurt performance
+ */
+ ~DevStat() {
+ std::shared_ptr<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>> es = opr_exec_stats_;
+ if (es) {
+ OprExecStat *opr_stat = nullptr;
+ while (es->try_dequeue(opr_stat)) {
+ delete opr_stat;
+ }
+ }
+ }
};
diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py
index 724ed3a..78baf4a 100644
--- a/tests/python/unittest/test_profiler.py
+++ b/tests/python/unittest/test_profiler.py
@@ -19,14 +19,12 @@ from __future__ import print_function
import mxnet as mx
from mxnet import profiler
import time
-import numpy as np
def test_profiler():
profile_filename = "test_profile.json"
- iter_num = 100
- begin_profiling_iter = 50
- end_profiling_iter = 50
-
+ iter_num = 5
+ begin_profiling_iter = 2
+ end_profiling_iter = 4
profiler.profiler_set_config(mode='symbolic', filename=profile_filename)
print('profile file save to {0}'.format(profile_filename))
@@ -43,9 +41,9 @@ def test_profiler():
a.copyto(executor.arg_dict['A'])
b.copyto(executor.arg_dict['B'])
- flag = False
print("execution begin")
for i in range(iter_num):
+ print("Iteration {}/{}".format(i + 1, iter_num))
if i == begin_profiling_iter:
t0 = time.clock()
profiler.profiler_set_state('run')
@@ -59,6 +57,7 @@ def test_profiler():
duration = t1 - t0
print('duration: {0}s'.format(duration))
print(' {0}ms/operator'.format(duration*1000/iter_num))
+ profiler.dump_profile()
if __name__ == '__main__':
test_profiler()
--
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].