You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2018/09/19 00:07:14 UTC
[incubator-mxnet] branch master updated: [MXNET-910] Multithreading
inference. (#12456)
This is an automated email from the ASF dual-hosted git repository.
haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new d8984e8 [MXNET-910] Multithreading inference. (#12456)
d8984e8 is described below
commit d8984e836e1f04142912e366ca73e51a6b1c6353
Author: Da Zheng <zh...@gmail.com>
AuthorDate: Tue Sep 18 17:07:01 2018 -0700
[MXNET-910] Multithreading inference. (#12456)
* add multi-threading inference.
* demo multi-threading inference.
* add new capi.
* make naive engine thread local.
* create an executor inside each thread.
* fix format.
* fix format.
* fix format.
* Revert "make naive engine thread local."
This reverts commit b9d844e46d33f11ad409feb099194e183da9bbda.
* Update CAPI.
* add doc.
* fix lint.
* update example.
* update.
* fix.
* add check.
* fix.
* fix example.
* update name.
* update README.
---
example/image-classification/predict-cpp/Makefile | 2 +-
example/image-classification/predict-cpp/README.md | 6 +-
.../predict-cpp/image-classification-predict.cc | 131 +++++++++-----
include/mxnet/c_predict_api.h | 33 ++++
src/c_api/c_predict_api.cc | 197 +++++++++++++++------
5 files changed, 262 insertions(+), 107 deletions(-)
diff --git a/example/image-classification/predict-cpp/Makefile b/example/image-classification/predict-cpp/Makefile
index e0c0bc6..5c08411 100644
--- a/example/image-classification/predict-cpp/Makefile
+++ b/example/image-classification/predict-cpp/Makefile
@@ -15,7 +15,7 @@ LDFLAGS+=`pkg-config --libs opencv`
export MXNET_ROOT=`pwd`/../../..
CFLAGS+=-Wall -I$(MXNET_ROOT)/include
-LDFLAGS+=$(MXNET_ROOT)/lib/libmxnet.so
+LDFLAGS+=$(MXNET_ROOT)/lib/libmxnet.so -lpthread
image-classification-predict: image-classification-predict.o
g++ -O3 -o image-classification-predict image-classification-predict.o $(LDFLAGS)
diff --git a/example/image-classification/predict-cpp/README.md b/example/image-classification/predict-cpp/README.md
index 69f63d7..2a5e350 100644
--- a/example/image-classification/predict-cpp/README.md
+++ b/example/image-classification/predict-cpp/README.md
@@ -1,5 +1,5 @@
# Image Classification Example Using the C Predict API
-This is a simple predictor which shows how to use the MXNet C Predict API for image classification with a pre-trained ImageNet model.
+This is a simple predictor which shows how to use the MXNet C Predict API for image classification with a pre-trained ImageNet model in a single thread and multiple threads.
## Prerequisites
@@ -45,10 +45,10 @@ Run the example by passing it an image that you want to classify. If you don't h
wget https://upload.wikimedia.org/wikipedia/commons/thumb/f/f4/Honeycrisp.jpg/1920px-Honeycrisp.jpg
```
-Then run the `image-classification-predict` program, passing the image as the argument.
+Then run the `image-classification-predict` program, passing the image as the first argument and the number of threads as the second parameter.
```bash
- ./image-classification-predict 1920px-Honeycrisp.jpg
+ ./image-classification-predict 1920px-Honeycrisp.jpg 1
```
## Tips
diff --git a/example/image-classification/predict-cpp/image-classification-predict.cc b/example/image-classification/predict-cpp/image-classification-predict.cc
index 186107b..2a605b8 100644
--- a/example/image-classification/predict-cpp/image-classification-predict.cc
+++ b/example/image-classification/predict-cpp/image-classification-predict.cc
@@ -37,6 +37,7 @@
#include <fstream>
#include <vector>
#include <memory>
+#include <thread>
#include <iomanip>
#include <opencv2/opencv.hpp>
// Path for c_predict_api
@@ -179,14 +180,56 @@ void PrintOutputResult(const std::vector<float>& data, const std::vector<std::st
"accuracy=" << std::setprecision(8) << best_accuracy << ")" << std::endl;
}
+void predict(PredictorHandle pred_hnd, const std::vector<mx_float> &image_data,
+ NDListHandle nd_hnd, const std::string &synset_file, int i) {
+ auto image_size = image_data.size();
+ // Set Input Image
+ MXPredSetInput(pred_hnd, "data", image_data.data(), static_cast<mx_uint>(image_size));
+
+ // Do Predict Forward
+ MXPredForward(pred_hnd);
+
+ mx_uint output_index = 0;
+
+ mx_uint* shape = nullptr;
+ mx_uint shape_len;
+
+ // Get Output Result
+ MXPredGetOutputShape(pred_hnd, output_index, &shape, &shape_len);
+
+ std::size_t size = 1;
+ for (mx_uint i = 0; i < shape_len; ++i) { size *= shape[i]; }
+
+ std::vector<float> data(size);
+
+ MXPredGetOutput(pred_hnd, output_index, &(data[0]), static_cast<mx_uint>(size));
+
+ // Release NDList
+ if (nd_hnd) {
+ MXNDListFree(nd_hnd);
+ }
+
+ // Release Predictor
+ MXPredFree(pred_hnd);
+
+ // Synset path for your model, you have to modify it
+ auto synset = LoadSynset(synset_file);
+
+ // Print Output Data
+ PrintOutputResult(data, synset);
+}
+
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "No test image here." << std::endl
- << "Usage: ./image-classification-predict apple.jpg" << std::endl;
+ << "Usage: ./image-classification-predict apple.jpg [num_threads]" << std::endl;
return EXIT_FAILURE;
}
std::string test_file(argv[1]);
+ int num_threads = 1;
+ if (argc == 3)
+ num_threads = std::atoi(argv[2]);
// Models path for your model, you have to modify it
std::string json_file = "model/Inception/Inception-BN-symbol.json";
@@ -214,25 +257,11 @@ int main(int argc, char* argv[]) {
static_cast<mx_uint>(channels),
static_cast<mx_uint>(height),
static_cast<mx_uint>(width) };
- PredictorHandle pred_hnd = nullptr;
if (json_data.GetLength() == 0 || param_data.GetLength() == 0) {
return EXIT_FAILURE;
}
- // Create Predictor
- MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
- static_cast<const char*>(param_data.GetBuffer()),
- static_cast<int>(param_data.GetLength()),
- dev_type,
- dev_id,
- num_input_nodes,
- input_keys,
- input_shape_indptr,
- input_shape_data,
- &pred_hnd);
- assert(pred_hnd);
-
auto image_size = static_cast<std::size_t>(width * height * channels);
// Read Mean Data
@@ -259,40 +288,46 @@ int main(int argc, char* argv[]) {
GetImageFile(test_file, image_data.data(), channels, cv::Size(width, height), nd_data);
- // Set Input Image
- MXPredSetInput(pred_hnd, "data", image_data.data(), static_cast<mx_uint>(image_size));
-
- // Do Predict Forward
- MXPredForward(pred_hnd);
-
- mx_uint output_index = 0;
-
- mx_uint* shape = nullptr;
- mx_uint shape_len;
-
- // Get Output Result
- MXPredGetOutputShape(pred_hnd, output_index, &shape, &shape_len);
-
- std::size_t size = 1;
- for (mx_uint i = 0; i < shape_len; ++i) { size *= shape[i]; }
-
- std::vector<float> data(size);
-
- MXPredGetOutput(pred_hnd, output_index, &(data[0]), static_cast<mx_uint>(size));
-
- // Release NDList
- if (nd_hnd) {
- MXNDListFree(nd_hnd);
+ if (num_threads == 1) {
+ // Create Predictor
+ PredictorHandle pred_hnd;
+ MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
+ static_cast<const char*>(param_data.GetBuffer()),
+ static_cast<int>(param_data.GetLength()),
+ dev_type,
+ dev_id,
+ num_input_nodes,
+ input_keys,
+ input_shape_indptr,
+ input_shape_data,
+ &pred_hnd);
+ assert(pred_hnd);
+
+ predict(pred_hnd, image_data, nd_hnd, synset_file, 0);
+ } else {
+ // Create Predictor
+ std::vector<PredictorHandle> pred_hnds(num_threads, nullptr);
+ MXPredCreateMultiThread(static_cast<const char*>(json_data.GetBuffer()),
+ static_cast<const char*>(param_data.GetBuffer()),
+ static_cast<int>(param_data.GetLength()),
+ dev_type,
+ dev_id,
+ num_input_nodes,
+ input_keys,
+ input_shape_indptr,
+ input_shape_data,
+ pred_hnds.size(),
+ pred_hnds.data());
+ for (auto hnd : pred_hnds)
+ assert(hnd);
+
+ std::vector<std::thread> threads;
+ for (int i = 0; i < num_threads; i++)
+ threads.emplace_back(predict, pred_hnds[i], image_data, nd_hnd, synset_file, i);
+ for (int i = 0; i < num_threads; i++)
+ threads[i].join();
}
-
- // Release Predictor
- MXPredFree(pred_hnd);
-
- // Synset path for your model, you have to modify it
- auto synset = LoadSynset(synset_file);
-
- // Print Output Data
- PrintOutputResult(data, synset);
+ printf("run successfully\n");
return EXIT_SUCCESS;
}
diff --git a/include/mxnet/c_predict_api.h b/include/mxnet/c_predict_api.h
index cc1c296..16addff 100644
--- a/include/mxnet/c_predict_api.h
+++ b/include/mxnet/c_predict_api.h
@@ -119,6 +119,39 @@ MXNET_DLL int MXPredCreatePartialOut(const char* symbol_json_str,
mx_uint num_output_nodes,
const char** output_keys,
PredictorHandle* out);
+
+/*!
+ * \brief create predictors for multiple threads. One predictor for a thread.
+ * \param symbol_json_str The JSON string of the symbol.
+ * \param param_bytes The in-memory raw bytes of parameter ndarray file.
+ * \param param_size The size of parameter ndarray file.
+ * \param dev_type The device type, 1: cpu, 2:gpu
+ * \param dev_id The device id of the predictor.
+ * \param num_input_nodes Number of input nodes to the net,
+ * For feedforward net, this is 1.
+ * \param input_keys The name of input argument.
+ * For feedforward net, this is {"data"}
+ * \param input_shape_indptr Index pointer of shapes of each input node.
+ * The length of this array = num_input_nodes + 1.
+ * For feedforward net that takes 4 dimensional input, this is {0, 4}.
+ * \param input_shape_data A flatted data of shapes of each input node.
+ * For feedforward net that takes 4 dimensional input, this is the shape data.
+ * \param num_threads The number of threads that we'll run the predictors.
+ * \param out An array of created predictor handles. The array has to be large
+ * enough to keep `num_threads` predictors.
+ * \return 0 when success, -1 when failure.
+ */
+MXNET_DLL int MXPredCreateMultiThread(const char* symbol_json_str,
+ const void* param_bytes,
+ int param_size,
+ int dev_type, int dev_id,
+ mx_uint num_input_nodes,
+ const char** input_keys,
+ const mx_uint* input_shape_indptr,
+ const mx_uint* input_shape_data,
+ int num_threads,
+ PredictorHandle* out);
+
/*!
* \brief Change the input shape of an existing predictor.
* \param num_input_nodes Number of input nodes to the net,
diff --git a/src/c_api/c_predict_api.cc b/src/c_api/c_predict_api.cc
index d84a89a..c2576cc 100644
--- a/src/c_api/c_predict_api.cc
+++ b/src/c_api/c_predict_api.cc
@@ -67,47 +67,39 @@ struct MXAPINDList {
std::vector<mx_float> data;
};
-int MXPredCreate(const char* symbol_json_str,
- const void* param_bytes,
- int param_size,
- int dev_type, int dev_id,
- mx_uint num_input_nodes,
- const char** input_keys,
- const mx_uint* input_shape_indptr,
- const mx_uint* input_shape_data,
- PredictorHandle* out) {
- return MXPredCreatePartialOut(
- symbol_json_str,
- param_bytes,
- param_size,
- dev_type,
- dev_id,
- num_input_nodes,
- input_keys,
- input_shape_indptr,
- input_shape_data,
- 0,
- NULL,
- out);
+inline void _CreateExecutor(PredictorHandle pred_hnd) {
+ MXAPIPredictor *pred = static_cast<MXAPIPredictor*>(pred_hnd);
+ if (pred->exec == nullptr) {
+ auto sym = pred->sym;
+ auto ctx = pred->ctx;
+ auto key2arg = pred->key2arg;
+ auto arg_arrays = pred->arg_arrays;
+ auto aux_arrays = pred->aux_arrays;
+ std::map<std::string, Context> ctx_map;
+ std::vector<NDArray> grad_store(arg_arrays.size());
+ std::vector<OpReqType> grad_req(arg_arrays.size(), kNullOp);
+ pred->exec.reset(Executor::Bind(sym, ctx, ctx_map, arg_arrays,
+ grad_store, grad_req, aux_arrays));
+ pred->out_arrays = pred->exec->outputs();
+ }
}
-namespace mxnet {
-} // namespace mxnet
-
-int MXPredCreatePartialOut(const char* symbol_json_str,
- const void* param_bytes,
- int param_size,
- int dev_type, int dev_id,
- mx_uint num_input_nodes,
- const char** input_keys,
- const mx_uint* input_shape_indptr,
- const mx_uint* input_shape_data,
- mx_uint num_output_nodes,
- const char** output_keys,
- PredictorHandle* out) {
+int _CreatePartialOut(const char* symbol_json_str,
+ const void* param_bytes,
+ int param_size,
+ int dev_type, int dev_id,
+ mx_uint num_input_nodes,
+ const char** input_keys,
+ const mx_uint* input_shape_indptr,
+ const mx_uint* input_shape_data,
+ mx_uint num_output_nodes,
+ const char** output_keys,
+ // This is used for paralle inference.
+ int num_threads,
+ bool lazy,
+ PredictorHandle* out) {
using nnvm::Symbol;
- MXAPIPredictor* ret = new MXAPIPredictor();
API_BEGIN();
Symbol sym;
// make sure symbols are registered
@@ -140,7 +132,6 @@ int MXPredCreatePartialOut(const char* symbol_json_str,
}
sym = nnvm::Symbol::CreateGroup(out_syms);
}
- ret->sym = sym;
// load the parameters
std::unordered_map<std::string, NDArray> arg_params, aux_params;
@@ -188,9 +179,10 @@ int MXPredCreatePartialOut(const char* symbol_json_str,
std::vector<TShape> out_shapes(sym.ListOutputNames().size());
std::vector<TShape> aux_shapes(aux_names.size());
std::vector<TShape> arg_shapes;
+ std::unordered_map<std::string, size_t> key2arg;
for (size_t i = 0; i < arg_names.size(); ++i) {
std::string key = arg_names[i];
- ret->key2arg[key] = i;
+ key2arg[key] = i;
}
try {
@@ -215,7 +207,6 @@ int MXPredCreatePartialOut(const char* symbol_json_str,
}
Context ctx = Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id);
- ret->ctx = ctx;
std::vector<NDArray> arg_arrays, aux_arrays;
for (size_t i = 0; i < arg_shapes.size(); ++i) {
@@ -232,24 +223,117 @@ int MXPredCreatePartialOut(const char* symbol_json_str,
}
aux_arrays.push_back(nd);
}
- ret->arg_arrays = arg_arrays;
- ret->aux_arrays = aux_arrays;
// bind
- {
- std::map<std::string, Context> ctx_map;
- std::vector<NDArray> grad_store(arg_arrays.size());
- std::vector<OpReqType> grad_req(arg_arrays.size(), kNullOp);
-
-
- ret->exec.reset(Executor::Bind(sym, ctx, ctx_map,
- arg_arrays,
- grad_store, grad_req,
- aux_arrays));
+ for (int i = 0; i < num_threads; i++) {
+ std::unique_ptr<MXAPIPredictor> ret(new MXAPIPredictor());
+ ret->sym = sym;
+ ret->ctx = ctx;
+ ret->key2arg = key2arg;
+ ret->arg_arrays = arg_arrays;
+ ret->aux_arrays = aux_arrays;
ret->out_shapes = out_shapes;
- ret->out_arrays = ret->exec->outputs();
+
+ if (!lazy) {
+ std::map<std::string, Context> ctx_map;
+ std::vector<NDArray> grad_store(arg_arrays.size());
+ std::vector<OpReqType> grad_req(arg_arrays.size(), kNullOp);
+ ret->exec.reset(Executor::Bind(sym, ctx, ctx_map,
+ arg_arrays,
+ grad_store, grad_req,
+ aux_arrays));
+ ret->out_arrays = ret->exec->outputs();
+ }
+ out[i] = ret.release();
}
- *out = ret;
- API_END_HANDLE_ERROR(delete ret);
+ API_END_HANDLE_ERROR();
+}
+
+int MXPredCreatePartialOut(const char* symbol_json_str,
+ const void* param_bytes,
+ int param_size,
+ int dev_type, int dev_id,
+ mx_uint num_input_nodes,
+ const char** input_keys,
+ const mx_uint* input_shape_indptr,
+ const mx_uint* input_shape_data,
+ mx_uint num_output_nodes,
+ const char** output_keys,
+ PredictorHandle* out) {
+ return _CreatePartialOut(
+ symbol_json_str,
+ param_bytes,
+ param_size,
+ dev_type, dev_id,
+ num_input_nodes,
+ input_keys,
+ input_shape_indptr,
+ input_shape_data,
+ num_output_nodes,
+ output_keys,
+ 1,
+ false,
+ out);
+}
+
+int MXPredCreate(const char* symbol_json_str,
+ const void* param_bytes,
+ int param_size,
+ int dev_type, int dev_id,
+ mx_uint num_input_nodes,
+ const char** input_keys,
+ const mx_uint* input_shape_indptr,
+ const mx_uint* input_shape_data,
+ PredictorHandle* out) {
+ return _CreatePartialOut(
+ symbol_json_str,
+ param_bytes,
+ param_size,
+ dev_type,
+ dev_id,
+ num_input_nodes,
+ input_keys,
+ input_shape_indptr,
+ input_shape_data,
+ 0,
+ NULL,
+ 1,
+ false,
+ out);
+}
+
+int MXPredCreateMultiThread(const char* symbol_json_str,
+ const void* param_bytes,
+ int param_size,
+ int dev_type, int dev_id,
+ mx_uint num_input_nodes,
+ const char** input_keys,
+ const mx_uint* input_shape_indptr,
+ const mx_uint* input_shape_data,
+ // This is used for paralle inference.
+ int num_threads,
+ PredictorHandle* out) {
+ const char *type = getenv("MXNET_ENGINE_TYPE");
+ std::string stype;
+ if (type)
+ stype = type;
+ CHECK(stype == "NaiveEngine") << "Multithread inference only works with NaiveEngine.\n"
+ << "Please set MXNET_ENGINE_TYPE to NaiveEngine"
+ << std::endl;
+ return _CreatePartialOut(
+ symbol_json_str,
+ param_bytes,
+ param_size,
+ dev_type,
+ dev_id,
+ num_input_nodes,
+ input_keys,
+ input_shape_indptr,
+ input_shape_data,
+ 0,
+ NULL,
+ num_threads,
+ true,
+ out);
}
int MXPredReshape(mx_uint num_input_nodes,
@@ -258,6 +342,7 @@ int MXPredReshape(mx_uint num_input_nodes,
const mx_uint* input_shape_data,
PredictorHandle handle,
PredictorHandle* out) {
+ _CreateExecutor(handle);
MXAPIPredictor* p = static_cast<MXAPIPredictor*>(handle);
std::unique_ptr<MXAPIPredictor> ret(new MXAPIPredictor());
@@ -374,6 +459,7 @@ int MXPredSetInput(PredictorHandle handle,
}
int MXPredForward(PredictorHandle handle) {
+ _CreateExecutor(handle);
MXAPIPredictor* p = static_cast<MXAPIPredictor*>(handle);
API_BEGIN();
p->exec->Forward(false);
@@ -381,6 +467,7 @@ int MXPredForward(PredictorHandle handle) {
}
int MXPredPartialForward(PredictorHandle handle, int step, int* step_left) {
+ _CreateExecutor(handle);
MXAPIPredictor* p = static_cast<MXAPIPredictor*>(handle);
API_BEGIN();
p->exec->PartialForward(false, step, step_left);