You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/08/22 17:25:52 UTC
systemml git commit: [SYSTEMML-540] Avoid unnecessary sparse-to-dense conversion on depthwise convolution layers and probability matrix

Repository: systemml
Updated Branches:
  refs/heads/master 0325da7de -> 4d5a82ecf


[SYSTEMML-540] Avoid unnecessary sparse-to-dense conversion on depthwise convolution layers and probability matrix

```
Network: unet
Setup: 30g driver, K80 GPU (only 1 GPU used)
Performance before this commit:
Total elapsed time:             306.291 sec.
1  leftIndex [106:4-106:42]
139.392   28552  csrlix[114.674s,22649], aqrs[0.021s,23165],
rls[0.069s,57104], s2d[8.691s,258], aqrd[14.199s,33939],
lixcp[1.351s,5903]

Performance after this commit:
Total elapsed time:             220.712 sec.
6  leftIndex [106:4-106:42]
21.066   28552  rls[0.036s,57104], lixcp[5.375s,28552],
aqrd[15.423s,57104]
```

Closes #610.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4d5a82ec
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4d5a82ec
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4d5a82ec

Branch: refs/heads/master
Commit: 4d5a82ecf18c57f3cf614364fdfabaf0680a51fe
Parents: 0325da7
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Tue Aug 22 10:23:20 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Tue Aug 22 10:24:59 2017 -0700

----------------------------------------------------------------------
 scripts/nn/layers/conv2d_depthwise.dml                 | 12 ++++++++----
 scripts/nn/layers/conv2d_transpose_depthwise.dml       | 12 ++++++++----
 src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala |  2 +-
 3 files changed, 17 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/scripts/nn/layers/conv2d_depthwise.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_depthwise.dml b/scripts/nn/layers/conv2d_depthwise.dml
index ff36ea1..4e4d3e4 100644
--- a/scripts/nn/layers/conv2d_depthwise.dml
+++ b/scripts/nn/layers/conv2d_depthwise.dml
@@ -79,7 +79,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
   Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1))
 
   # create output volume
-  out = matrix(0, rows=N, cols=C*M*Hout*Wout)
+  # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. 
+  # This is a complete hack until the engine is improved.
+  out = matrix(1, rows=N, cols=C*M*Hout*Wout)
 
   # depthwise convolution
   # TODO: Explore usage of parfor loops more to determine if they can provide a performance
@@ -137,9 +139,11 @@ backward = function(matrix[double] dout, int Hout, int Wout,
   C = nrow(W)
 
   # create gradient volumes
-  dX = matrix(0, rows=N, cols=C*Hin*Win)
-  dW = matrix(0, rows=C, cols=M*Hf*Wf)
-  db = matrix(0, rows=C*M, cols=1)
+  # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. 
+  # This is a complete hack until the engine is improved.
+  dX = matrix(1, rows=N, cols=C*Hin*Win)
+  dW = matrix(1, rows=C, cols=M*Hf*Wf)
+  db = matrix(1, rows=C*M, cols=1)
 
   # partial derivatives for depthwise convolution
   for (c in 1:C) {  # all examples

http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/scripts/nn/layers/conv2d_transpose_depthwise.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_transpose_depthwise.dml b/scripts/nn/layers/conv2d_transpose_depthwise.dml
index b3e798c..0a9a235 100644
--- a/scripts/nn/layers/conv2d_transpose_depthwise.dml
+++ b/scripts/nn/layers/conv2d_transpose_depthwise.dml
@@ -85,7 +85,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
   Wout = stridew*(Win-1) - 2*padw + Wf + out_padw
 
   # create output volume
-  out = matrix(0, rows=N, cols=C/M*Hout*Wout)
+  # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. 
+  # This is a complete hack until the engine is improved.
+  out = matrix(1, rows=N, cols=C/M*Hout*Wout)
 
   # depthwise transpose convolution
   # TODO: Explore usage of parfor loops more to determine if they can provide a performance
@@ -146,9 +148,11 @@ backward = function(matrix[double] dout, int Hout, int Wout,
   F = nrow(W)
 
   # create gradient volumes
-  dX = matrix(0, rows=N, cols=C*Hin*Win)
-  dW = matrix(0, rows=C/M, cols=M*Hf*Wf)
-  db = matrix(0, rows=C/M, cols=1)
+  # NOTE: We initialize to 1s vs. 0s to avoid conversions between sparse and dense formats. 
+  # This is a complete hack until the engine is improved.
+  dX = matrix(1, rows=N, cols=C*Hin*Win)
+  dW = matrix(1, rows=C/M, cols=M*Hf*Wf)
+  db = matrix(1, rows=C/M, cols=1)
 
   # depthwise transpose convolution
   for (f in 1:F) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/4d5a82ec/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index 25d19f6..000fe32 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -613,7 +613,7 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf
 	  
 	  val lossLayers = getLossLayers(net)
 	  val lastLayerShape = estimator.getOutputShapeOfLastLayer
-	  assign(tabDMLScript, "Prob", matrix("0", Caffe2DML.numImages, (lastLayerShape._1*lastLayerShape._2*lastLayerShape._3).toString))
+	  assign(tabDMLScript, "Prob", matrix("1", Caffe2DML.numImages, (lastLayerShape._1*lastLayerShape._2*lastLayerShape._3).toString))
 	  estimator.getTestAlgo.toLowerCase match {
       case "minibatch" => {
         ceilDivide(tabDMLScript(), "num_iters", Caffe2DML.numImages, Caffe2DML.batchSize)