You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by cj...@apache.org on 2017/12/06 16:49:06 UTC
[incubator-mxnet] branch master updated: Small first iteration for profiler: use nonblocking queue (#8803)

This is an automated email from the ASF dual-hosted git repository.

cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 43d534d  Small first iteration for profiler: use nonblocking queue (#8803)
43d534d is described below

commit 43d534d2ca87315531eb7f2715b8a5c2f033f25c
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Wed Dec 6 08:48:53 2017 -0800

    Small first iteration for profiler: use nonblocking queue (#8803)
    
    * Adjust profiling unit test
    
    * Switch profiler to use nonblocking queue
    
    * Profiler on by default (there's no overhead when not turned on)
    
    * lint
    
    * Set commit for dmlc-core
    
    * Fix amalgamation
    
    * Commit
    
    * delete opr_stat
    
    * delete opr_stat
    
    * Set new dmlc-core commit
    
    * Remove dual delete
    
    * Change commit on dmlc-core
    
    * Set newer dmlc-core commit
    
    * Try to set dmlc-core commit again
    
    * Excluse windows.h for non-Windows build
---
 CMakeLists.txt                         |  2 +-
 amalgamation/amalgamation.py           | 10 ++++++++-
 dmlc-core                              |  2 +-
 src/engine/profiler.cc                 | 41 +++++++++++++---------------------
 src/engine/profiler.h                  | 22 ++++++++++++++----
 tests/python/unittest/test_profiler.py | 11 +++++----
 6 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 50f6008..4febbe4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,7 +38,7 @@ mxnet_option(USE_MKL_EXPERIMENTAL "Use experimental MKL (if MKL enabled and foun
 mxnet_option(USE_OPERATOR_TUNING  "Enable auto-tuning of operators" ON AND NOT MSVC)
 mxnet_option(USE_GPERFTOOLS       "Build with GPerfTools support (if found)" ON)
 mxnet_option(USE_JEMALLOC         "Build with Jemalloc support"   ON)
-mxnet_option(USE_PROFILER         "Build with Profiler support"   OFF)
+mxnet_option(USE_PROFILER         "Build with Profiler support"   ON)
 mxnet_option(USE_DIST_KVSTORE     "Build with DIST_KVSTORE support" OFF)
 mxnet_option(USE_PLUGINS_WARPCTC  "Use WARPCTC Plugins" OFF)
 mxnet_option(USE_PLUGIN_CAFFE     "Use Caffe Plugin" OFF)
diff --git a/amalgamation/amalgamation.py b/amalgamation/amalgamation.py
index 2aba8f4..b378817 100644
--- a/amalgamation/amalgamation.py
+++ b/amalgamation/amalgamation.py
@@ -17,6 +17,7 @@
 
 import sys
 import os.path, re, StringIO
+import platform
 
 blacklist = [
     'Windows.h', 'cublas_v2.h', 'cuda/tensor_gpu-inl.cuh',
@@ -26,7 +27,8 @@ blacklist = [
     'malloc.h', 'mkl.h', 'mkl_cblas.h', 'mkl_vsl.h', 'mkl_vsl_functions.h',
     'nvml.h', 'opencv2/opencv.hpp', 'sys/stat.h', 'sys/types.h', 'cuda.h', 'cuda_fp16.h',
     'omp.h', 'execinfo.h', 'packet/sse-inl.h', 'emmintrin.h', 'thrust/device_vector.h',
-    'cusolverDn.h'
+    'cusolverDn.h', 'internal/concurrentqueue_internal_debug.h', 'relacy/relacy_std.hpp',
+    'relacy_shims.h'
     ]
 
 minimum = int(sys.argv[6]) if len(sys.argv) > 5 else 0
@@ -36,6 +38,12 @@ android = int(sys.argv[7]) if len(sys.argv) > 6 else 0
 if minimum != 0:
     blacklist.append('linalg.h')
 
+if platform.system() != 'Darwin':
+  blacklist.append('TargetConditionals.h')
+
+if platform.system() != 'Windows':
+  blacklist.append('windows.h')
+
 def pprint(lst):
     for item in lst:
         print item
diff --git a/dmlc-core b/dmlc-core
index 87b7ffa..ebbda66 160000
--- a/dmlc-core
+++ b/dmlc-core
@@ -1 +1 @@
-Subproject commit 87b7ffa59eb78f753073ac56f5f60e46d930b93c
+Subproject commit ebbda66217efd5f7e16ace68cae0128a7117d081
diff --git a/src/engine/profiler.cc b/src/engine/profiler.cc
index 44ad138..13f8cca 100644
--- a/src/engine/profiler.cc
+++ b/src/engine/profiler.cc
@@ -24,12 +24,8 @@
  */
 #include <dmlc/base.h>
 #include <dmlc/logging.h>
+#include <dmlc/omp.h>
 #include <mxnet/base.h>
-#include <set>
-#include <map>
-#include <mutex>
-#include <chrono>
-#include <iostream>
 #include <fstream>
 #include <thread>
 #include "./profiler.h"
@@ -44,7 +40,6 @@
 
 namespace mxnet {
 namespace engine {
-const int INITIAL_SIZE = 1024;
 
 Profiler::Profiler()
   : state_(kNotRunning), enable_output_(false), filename_("profile.json") {
@@ -59,14 +54,13 @@ Profiler::Profiler()
 #endif
 
   this->profile_stat = new DevStat[cpu_num_ + gpu_num_ + 1];
-  this->profile_stat->opr_exec_stats.reserve(INITIAL_SIZE);
   for (unsigned int i = 0; i < cpu_num_; ++i) {
-    profile_stat[i].dev_name = "cpu/" + std::to_string(i);
+    profile_stat[i].dev_name_ = "cpu/" + std::to_string(i);
   }
   for (unsigned int i = 0; i < gpu_num_; ++i) {
-    profile_stat[cpu_num_ + i].dev_name = "gpu/" + std::to_string(i);
+    profile_stat[cpu_num_ + i].dev_name_ = "gpu/" + std::to_string(i);
   }
-  profile_stat[cpu_num_ + gpu_num_].dev_name = "cpu pinned/";
+  profile_stat[cpu_num_ + gpu_num_].dev_name_ = "cpu pinned/";
 
   mode_ = (ProfilerMode)dmlc::GetEnv("MXNET_PROFILER_MODE", static_cast<int>(kOnlySymbolic));
   if (dmlc::GetEnv("MXNET_PROFILER_AUTOSTART", 0)) {
@@ -99,7 +93,7 @@ void Profiler::SetConfig(ProfilerMode mode, std::string output_filename) {
 }
 
 OprExecStat *Profiler::AddOprStat(int dev_type, uint32_t dev_id) {
-  OprExecStat* opr_stat = new OprExecStat;
+  std::unique_ptr<OprExecStat> opr_stat(new OprExecStat);
   opr_stat->dev_type = dev_type;
   opr_stat->dev_id   = dev_id;
   opr_stat->opr_name[sizeof(opr_stat->opr_name)-1] = '\0';
@@ -116,16 +110,13 @@ OprExecStat *Profiler::AddOprStat(int dev_type, uint32_t dev_id) {
       idx = cpu_num_ + gpu_num_;
       break;
     default:
-      LOG(FATAL) << "Unkown dev_type";
+      LOG(FATAL) << "Unknown dev_type: " << dev_type;
       return NULL;
   }
 
   DevStat& dev_stat = profile_stat[idx];
-  {
-    std::lock_guard<std::mutex> lock{dev_stat.m_};
-    dev_stat.opr_exec_stats.push_back(opr_stat);
-  }
-  return opr_stat;
+  dev_stat.opr_exec_stats_->enqueue(opr_stat.get());
+  return opr_stat.release();
 }
 
 void Profiler::EmitPid(std::ostream *os, const std::string& name, uint32_t pid) {
@@ -167,19 +158,17 @@ void Profiler::DumpProfile() {
 
   for (uint32_t i = 0; i < dev_num; ++i) {
     const DevStat &d = profile_stat[i];
-    this->EmitPid(&file, d.dev_name, i);
+    this->EmitPid(&file, d.dev_name_, i);
     file << ",\n";
   }
 
   bool first_flag = true;
   for (uint32_t i = 0; i < dev_num; ++i) {
     DevStat &d = profile_stat[i];
-    std::lock_guard<std::mutex> lock(d.m_);
-    uint32_t opr_num = d.opr_exec_stats.size();
-
-    for (uint32_t j = 0; j < opr_num; ++j) {
-      const OprExecStat* opr_stat = d.opr_exec_stats[j];
-
+    OprExecStat *_opr_stat;
+    while (d.opr_exec_stats_->try_dequeue(_opr_stat)) {
+      CHECK_NOTNULL(_opr_stat);
+      std::unique_ptr<OprExecStat> opr_stat(_opr_stat);  // manage lifecycle
       uint32_t pid = i;
       uint32_t tid = opr_stat->thread_id;
 
@@ -190,10 +179,10 @@ void Profiler::DumpProfile() {
       }
       file << std::endl;
       this->EmitEvent(&file, opr_stat->opr_name, "category", "B",
-            opr_stat->opr_start_rel_micros, pid, tid);
+                      opr_stat->opr_start_rel_micros, pid, tid);
       file << ",\n";
       this->EmitEvent(&file, opr_stat->opr_name, "category", "E",
-            opr_stat->opr_end_rel_micros, pid, tid);
+                      opr_stat->opr_end_rel_micros, pid, tid);
     }
   }
 
diff --git a/src/engine/profiler.h b/src/engine/profiler.h
index dbbc773..ebd9420 100644
--- a/src/engine/profiler.h
+++ b/src/engine/profiler.h
@@ -25,6 +25,7 @@
 #ifndef MXNET_ENGINE_PROFILER_H_
 #define MXNET_ENGINE_PROFILER_H_
 
+#include <dmlc/concurrentqueue.h>
 #include <vector>
 #include <string>
 #include <mutex>
@@ -65,11 +66,24 @@ struct OprExecStat {
  */
 struct DevStat {
   /*! \brief device name */
-  std::string dev_name;
+  std::string dev_name_;
   /*! \brief operation execution statistics on this device */
-  std::vector<OprExecStat*> opr_exec_stats;
-  /*! \brief internal mutex of the execution state */
-  std::mutex m_;
+  std::shared_ptr<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>> opr_exec_stats_ =
+    std::make_shared<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>>();
+
+  /*!
+   * \brief Destructor, clean up allocated objects
+   *        TODO(cjolivier01) Investigate queueing unique_ptr<>'s if it won't hurt performance
+   */
+  ~DevStat() {
+    std::shared_ptr<dmlc::moodycamel::ConcurrentQueue<OprExecStat *>> es = opr_exec_stats_;
+    if (es) {
+      OprExecStat *opr_stat = nullptr;
+      while (es->try_dequeue(opr_stat)) {
+        delete opr_stat;
+      }
+    }
+  }
 };
 
 
diff --git a/tests/python/unittest/test_profiler.py b/tests/python/unittest/test_profiler.py
index 724ed3a..78baf4a 100644
--- a/tests/python/unittest/test_profiler.py
+++ b/tests/python/unittest/test_profiler.py
@@ -19,14 +19,12 @@ from __future__ import print_function
 import mxnet as mx
 from mxnet import profiler
 import time
-import numpy as np
 
 def test_profiler():
     profile_filename = "test_profile.json"
-    iter_num = 100
-    begin_profiling_iter = 50
-    end_profiling_iter = 50
-
+    iter_num = 5
+    begin_profiling_iter = 2
+    end_profiling_iter = 4
 
     profiler.profiler_set_config(mode='symbolic', filename=profile_filename)
     print('profile file save to {0}'.format(profile_filename))
@@ -43,9 +41,9 @@ def test_profiler():
     a.copyto(executor.arg_dict['A'])
     b.copyto(executor.arg_dict['B'])
 
-    flag = False
     print("execution begin")
     for i in range(iter_num):
+        print("Iteration {}/{}".format(i + 1, iter_num))
         if i == begin_profiling_iter:
             t0 = time.clock()
             profiler.profiler_set_state('run')
@@ -59,6 +57,7 @@ def test_profiler():
     duration = t1 - t0
     print('duration: {0}s'.format(duration))
     print('          {0}ms/operator'.format(duration*1000/iter_num))
+    profiler.dump_profile()
 
 if __name__ == '__main__':
     test_profiler()

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].