You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/12/05 18:35:52 UTC
[GitHub] lanking520 closed pull request #13522: [MXNET-1249] Fix Object
Detector Performance with GPU
lanking520 closed pull request #13522: [MXNET-1249] Fix Object Detector Performance with GPU
URL: https://github.com/apache/incubator-mxnet/pull/13522
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/scala-package/infer/src/main/scala/org/apache/mxnet/infer/Classifier.scala b/scala-package/infer/src/main/scala/org/apache/mxnet/infer/Classifier.scala
index adeb33d34a9..cf55bc10d97 100644
--- a/scala-package/infer/src/main/scala/org/apache/mxnet/infer/Classifier.scala
+++ b/scala-package/infer/src/main/scala/org/apache/mxnet/infer/Classifier.scala
@@ -24,6 +24,7 @@ import org.slf4j.LoggerFactory
import scala.io
import scala.collection.mutable.ListBuffer
+import scala.collection.parallel.mutable.ParArray
trait ClassifierBase {
@@ -110,16 +111,21 @@ class Classifier(modelPathPrefix: String,
: IndexedSeq[IndexedSeq[(String, Float)]] = {
// considering only the first output
- val predictResultND: NDArray = predictor.predictWithNDArray(input)(0)
-
- val predictResult: ListBuffer[Array[Float]] = ListBuffer[Array[Float]]()
+ // Copy NDArray to CPU to avoid frequent GPU to CPU copying
+ val predictResultND: NDArray =
+ predictor.predictWithNDArray(input)(0).asInContext(Context.cpu())
+ // Parallel Execution with ParArray for better performance
+ val predictResultPar: ParArray[Array[Float]] =
+ new ParArray[Array[Float]](predictResultND.shape(0))
// iterating over the individual items(batch size is in axis 0)
- for (i <- 0 until predictResultND.shape(0)) {
+ (0 until predictResultND.shape(0)).toVector.par.foreach( i => {
val r = predictResultND.at(i)
- predictResult += r.toArray
+ predictResultPar(i) = r.toArray
r.dispose()
- }
+ })
+
+ val predictResult = predictResultPar.toArray
var result: ListBuffer[IndexedSeq[(String, Float)]] =
ListBuffer.empty[IndexedSeq[(String, Float)]]
diff --git a/scala-package/infer/src/main/scala/org/apache/mxnet/infer/ObjectDetector.scala b/scala-package/infer/src/main/scala/org/apache/mxnet/infer/ObjectDetector.scala
index a9b21f8c1dc..78b237a4a9c 100644
--- a/scala-package/infer/src/main/scala/org/apache/mxnet/infer/ObjectDetector.scala
+++ b/scala-package/infer/src/main/scala/org/apache/mxnet/infer/ObjectDetector.scala
@@ -19,6 +19,8 @@ package org.apache.mxnet.infer
// scalastyle:off
import java.awt.image.BufferedImage
+
+import scala.collection.parallel.mutable.ParArray
// scalastyle:on
import org.apache.mxnet.NDArray
import org.apache.mxnet.DataDesc
@@ -94,39 +96,39 @@ class ObjectDetector(modelPathPrefix: String,
def objectDetectWithNDArray(input: IndexedSeq[NDArray], topK: Option[Int])
: IndexedSeq[IndexedSeq[(String, Array[Float])]] = {
- val predictResult = predictor.predictWithNDArray(input)(0)
- var batchResult = ListBuffer[IndexedSeq[(String, Array[Float])]]()
- for (i <- 0 until predictResult.shape(0)) {
+ // Copy NDArray to CPU to avoid frequent GPU to CPU copying
+ val predictResult = predictor.predictWithNDArray(input)(0).asInContext(Context.cpu())
+ // Parallel Execution with ParArray for better performance
+ var batchResult = new ParArray[IndexedSeq[(String, Array[Float])]](predictResult.shape(0))
+ (0 until predictResult.shape(0)).toArray.par.foreach( i => {
val r = predictResult.at(i)
- batchResult += sortAndReformat(r, topK)
+ batchResult(i) = sortAndReformat(r, topK)
handler.execute(r.dispose())
- }
+ })
handler.execute(predictResult.dispose())
batchResult.toIndexedSeq
}
private[infer] def sortAndReformat(predictResultND: NDArray, topK: Option[Int])
: IndexedSeq[(String, Array[Float])] = {
- val predictResult: ListBuffer[Array[Float]] = ListBuffer[Array[Float]]()
- val accuracy: ListBuffer[Float] = ListBuffer[Float]()
-
// iterating over the all the predictions
val length = predictResultND.shape(0)
- for (i <- 0 until length) {
+ val predictResult = (0 until length).toArray.par.flatMap( i => {
val r = predictResultND.at(i)
val tempArr = r.toArray
- if (tempArr(0) != -1.0) {
- predictResult += tempArr
- accuracy += tempArr(1)
+ val res = if (tempArr(0) != -1.0) {
+ Array[Array[Float]](tempArr)
} else {
// Ignore the minus 1 part
+ Array[Array[Float]]()
}
handler.execute(r.dispose())
- }
+ res
+ }).toArray
var result = IndexedSeq[(String, Array[Float])]()
if (topK.isDefined) {
- var sortedIndices = accuracy.zipWithIndex.sortBy(-_._1).map(_._2)
+ var sortedIndices = predictResult.zipWithIndex.sortBy(-_._1(1)).map(_._2)
sortedIndices = sortedIndices.take(topK.get)
// takeRight(5) would provide the output as Array[Accuracy, Xmin, Ymin, Xmax, Ymax
result = sortedIndices.map(idx
@@ -136,7 +138,6 @@ class ObjectDetector(modelPathPrefix: String,
result = predictResult.map(ele
=> (synset(ele(0).toInt), ele.takeRight(5))).toIndexedSeq
}
-
result
}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services