You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ma...@apache.org on 2022/05/10 20:14:22 UTC
[tvm] branch main updated: [OpenCL] Change of OpenCL profiling logic (#11180)
This is an automated email from the ASF dual-hosted git repository.
masahi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 0f6abea1ca [OpenCL] Change of OpenCL profiling logic (#11180)
0f6abea1ca is described below
commit 0f6abea1cafc664af0cb97f348906ae5715a5f51
Author: Kirill Snezhko <44...@users.noreply.github.com>
AuthorDate: Tue May 10 23:14:16 2022 +0300
[OpenCL] Change of OpenCL profiling logic (#11180)
* Enable profiling only when it is used explicitly
* Change logic of clCommandQueue create/destroy
* Update comments
* Linter fix
* Refactor queue create
* Move queue recreation logic to function
* Replace profiling flag by the queue info request
* Enhance readability
* Fix linter errors
---
src/runtime/opencl/opencl_common.h | 57 ++++++++++++++++++++++++++++-----
src/runtime/opencl/opencl_device_api.cc | 5 ---
src/runtime/opencl/opencl_module.cc | 19 +++++------
3 files changed, 59 insertions(+), 22 deletions(-)
diff --git a/src/runtime/opencl/opencl_common.h b/src/runtime/opencl/opencl_common.h
index 18061a7aee..c2905b4327 100644
--- a/src/runtime/opencl/opencl_common.h
+++ b/src/runtime/opencl/opencl_common.h
@@ -274,6 +274,16 @@ class OpenCLWorkspace : public DeviceAPI {
<< "Invalid OpenCL device_id=" << dev.device_id;
return events[dev.device_id];
}
+ // is current clCommandQueue in profiling mode
+ bool IsProfiling(Device dev) {
+ cl_command_queue queue = GetQueue(dev);
+ cl_command_queue_properties prop;
+
+ OPENCL_CALL(clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+ sizeof(cl_command_queue_properties), &prop, nullptr));
+
+ return prop & CL_QUEUE_PROFILING_ENABLE;
+ }
// override device API
void SetDevice(Device dev) final;
@@ -422,23 +432,32 @@ class OpenCLTimerNode : public TimerNode {
virtual void Start() {
cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).clear();
this->duration = 0;
+ // Very first call of Start() leads to the recreation of
+ // OpenCL command queue in profiling mode. This allows to run profile after inference.
+ recreateCommandQueue();
}
// Timer stop
virtual void Stop() {
std::vector<cl_event> evt_queue = cl::OpenCLWorkspace::Global()->GetEventQueue(dev_);
cl_ulong start, end;
- OPENCL_CALL(clWaitForEvents(1, &(cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).back())));
- for (auto& kevt : evt_queue) {
- OPENCL_CALL(clGetEventProfilingInfo(kevt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong),
- &start, nullptr));
- OPENCL_CALL(
- clGetEventProfilingInfo(kevt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr));
- this->duration += (end - start);
+ if (cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).size() > 0) {
+ OPENCL_CALL(clWaitForEvents(1, &(cl::OpenCLWorkspace::Global()->GetEventQueue(dev_).back())));
+ for (auto& kevt : evt_queue) {
+ OPENCL_CALL(clGetEventProfilingInfo(kevt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong),
+ &start, nullptr));
+ OPENCL_CALL(clGetEventProfilingInfo(kevt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end,
+ nullptr));
+ this->duration += (end - start);
+ }
}
}
virtual int64_t SyncAndGetElapsedNanos() { return this->duration; }
// destructor
- virtual ~OpenCLTimerNode() {}
+ virtual ~OpenCLTimerNode() {
+ // Profiling session ends, recreate clCommandQueue in non-profiling mode
+ // This will disable collection of cl_events in case of executing inference after profile
+ recreateCommandQueue();
+ }
// constructor
OpenCLTimerNode() {}
explicit OpenCLTimerNode(Device dev) : dev_(dev) {}
@@ -449,6 +468,28 @@ class OpenCLTimerNode : public TimerNode {
private:
int64_t duration;
Device dev_;
+
+ void recreateCommandQueue() {
+ cl_command_queue_properties prop;
+ if (!cl::OpenCLWorkspace::Global()->IsProfiling(dev_)) {
+ prop = CL_QUEUE_PROFILING_ENABLE;
+ } else {
+ prop = 0;
+ }
+
+ auto queue = cl::OpenCLWorkspace::Global()->GetQueue(dev_);
+
+ OPENCL_CALL(clFlush(queue));
+ OPENCL_CALL(clFinish(queue));
+ OPENCL_CALL(clReleaseCommandQueue(queue));
+
+ cl_int err_code;
+ cl_device_id did = cl::OpenCLWorkspace::Global()->devices[dev_.device_id];
+ auto profiling_queue =
+ clCreateCommandQueue(cl::OpenCLWorkspace::Global()->context, did, prop, &err_code);
+ OPENCL_CHECK_ERROR(err_code);
+ cl::OpenCLWorkspace::Global()->queues[dev_.device_id] = profiling_queue;
+ }
};
} // namespace runtime
} // namespace tvm
diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc
index c352716042..80b95a6ebf 100644
--- a/src/runtime/opencl/opencl_device_api.cc
+++ b/src/runtime/opencl/opencl_device_api.cc
@@ -426,12 +426,7 @@ void OpenCLWorkspace::Init(const std::string& type_key, const std::string& devic
ICHECK_EQ(this->queues.size(), 0U);
for (size_t i = 0; i < this->devices.size(); ++i) {
cl_device_id did = this->devices[i];
-#ifdef USE_PROFILER
- this->queues.push_back(
- clCreateCommandQueue(this->context, did, CL_QUEUE_PROFILING_ENABLE, &err_code));
-#else
this->queues.push_back(clCreateCommandQueue(this->context, did, 0, &err_code));
-#endif
OPENCL_CHECK_ERROR(err_code);
}
this->events.resize(this->devices.size());
diff --git a/src/runtime/opencl/opencl_module.cc b/src/runtime/opencl/opencl_module.cc
index e08c6070bc..9ae80d59d5 100644
--- a/src/runtime/opencl/opencl_module.cc
+++ b/src/runtime/opencl/opencl_module.cc
@@ -79,15 +79,16 @@ class OpenCLWrappedFunc {
wl.work_size[i] *= wl.work_size[i + 3];
}
// launch kernel
-#ifdef USE_PROFILER
- w_->GetEventQueue(t->device).resize(w_->GetEventQueue(t->device).size() + 1);
- OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
- wl.work_size + 3, 0, nullptr,
- &(w_->GetEventQueue(t->device).back())));
-#else
- OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
- wl.work_size + 3, 0, nullptr, nullptr));
-#endif
+
+ if (w_->IsProfiling(t->device)) {
+ w_->GetEventQueue(t->device).resize(w_->GetEventQueue(t->device).size() + 1);
+ OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
+ wl.work_size + 3, 0, nullptr,
+ &(w_->GetEventQueue(t->device).back())));
+ } else {
+ OPENCL_CALL(clEnqueueNDRangeKernel(queue, kernel, work_dim, nullptr, wl.work_size,
+ wl.work_size + 3, 0, nullptr, nullptr));
+ }
}
private: