You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/06/19 20:54:48 UTC

[1/4] systemml git commit: [MINOR] Cleanup in the `nn` library.

Repository: systemml
Updated Branches:
  refs/heads/master 9389a5e1e -> c83e99af7


[MINOR] Cleanup in the `nn` library.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cbfb21cb
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cbfb21cb
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cbfb21cb

Branch: refs/heads/master
Commit: cbfb21cbcdcac699f93cdbb851138f17f6fcd9b6
Parents: 9389a5e
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Jun 19 13:52:47 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Jun 19 13:52:47 2017 -0700

----------------------------------------------------------------------
 scripts/nn/layers/conv2d_transpose.dml |  10 +-
 scripts/nn/test/test.dml               | 182 ++++++++++++++--------------
 2 files changed, 97 insertions(+), 95 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/cbfb21cb/scripts/nn/layers/conv2d_transpose.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_transpose.dml b/scripts/nn/layers/conv2d_transpose.dml
index eee19a5..bdc5090 100644
--- a/scripts/nn/layers/conv2d_transpose.dml
+++ b/scripts/nn/layers/conv2d_transpose.dml
@@ -20,10 +20,11 @@
 #-------------------------------------------------------------
 
 /*
- * 2D Transpose convolutional layer.
+ * 2D Transpose Convolutional layer.
  *
  * Utilizes built-in convolution operators for higher performance.
  */
+source("nn/util.dml") as util
 
 forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
                    int C, int Hin, int Win, int Hf, int Wf,
@@ -60,8 +61,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
    */
   N = nrow(X)
   F = nrow(b)
-  Hout = strideh * (Hin-1) - 2*padh + Hf + out_padh
-  Wout = stridew * (Win-1) - 2*padw + Wf + out_padw
+  Hout = strideh*(Hin-1) - 2*padh + Hf + out_padh
+  Wout = stridew*(Win-1) - 2*padw + Wf + out_padw
 
   # Transpose convolution aims to go in the other direction of
   # (direct) convolution, i.e., given input X, produce output O such
@@ -146,7 +147,7 @@ backward = function(matrix[double] dout, int Hout, int Wout,
               stride=[strideh,stridew], padding=[padh,padw])
 
   # Partial derivatives for bias vector
-  db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout))
+  db = util::channel_sums(dout, F, Hout, Wout)
 }
 
 init = function(int F, int C, int Hf, int Wf)
@@ -235,3 +236,4 @@ init_bilinear = function(int C, int K)
 
   b = matrix(0, rows=C, cols=1)
 }
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/cbfb21cb/scripts/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml
index cfb8c79..94965bf 100644
--- a/scripts/nn/test/test.dml
+++ b/scripts/nn/test/test.dml
@@ -69,6 +69,97 @@ batch_norm1d = function() {
   }
 }
 
+batch_norm2d = function() {
+  /*
+   * Test for the 2D (spatial) batch normalization function.
+   */
+  print("Testing the 2D (spatial) batch normalization function.")
+
+  # Generate data
+  N = 2  # Number of examples
+  C = 3  # num channels
+  Hin = 4  # input height
+  Win = 5  # input width
+  mode = 'train'  # execution mode
+  mu = 0.9  # momentum of moving averages
+  eps = 1e-5  # smoothing term
+  X = matrix("70  29 23 55 72
+              42  98 68 48 39
+              34  73 44  6 40
+              74  18 18 53 53
+
+              63  85 72 61 72
+              32  36 23 29 63
+               9  43 43 49 43
+              31  43 89 94 50
+
+              62  12 32 41 87
+              25  48 99 52 61
+              12  83 60 55 34
+              30  42 68 88 51
+
+
+              67  59 62 67 84
+               8  76 24 19 57
+              10  89 63 72  2
+              59  56 16 15 70
+
+              32  69 55 39 93
+              84  36  4 30 40
+              70 100 36 76 59
+              69  15 40 24 34
+
+              51  67 11 13 32
+              66  85 55 85 38
+              32  35 17 83 34
+              55  58 52  0 99", rows=N, cols=C*Hin*Win)
+
+  # Create layer
+  [gamma, beta, ema_mean, ema_var] = batch_norm2d::init(C)
+
+  # Forward
+  [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
+      batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
+
+  # Equivalency check
+  target = matrix("0.86215019 -0.76679718 -1.00517964  0.26619387  0.94161105
+                  -0.25030172  1.97460198  0.78268933 -0.01191914 -0.36949289
+                  -0.56814504  0.98134136 -0.17084086 -1.68059683 -0.32976246
+                   1.02107191 -1.20383179 -1.20383179  0.18673301  0.18673301
+
+                   0.50426388  1.41921711  0.87856293  0.42108631  0.87856293
+                  -0.78498828 -0.61863315 -1.15928721 -0.90975463  0.50426388
+                  -1.74153018 -0.32751167 -0.32751167 -0.07797909 -0.32751167
+                  -0.82657707 -0.32751167  1.58557224  1.79351616 -0.0363903
+
+                   0.4607178  -1.49978399 -0.71558321 -0.36269283  1.44096887
+                  -0.99005347 -0.08822262  1.91148913  0.06861746  0.42150795
+                  -1.49978399  1.28412855  0.38229787  0.18624771 -0.63716316
+                  -0.79400325 -0.32348287  0.69597805  1.48017895  0.0294075
+
+
+                   0.74295878  0.42511559  0.54430676  0.74295878  1.41837597
+                  -1.60113597  1.10053277 -0.96544927 -1.16410136  0.34565473
+                  -1.52167511  1.61702824  0.5840373   0.94161105 -1.83951855
+                   0.42511559  0.30592418 -1.28329265 -1.32302308  0.86215019
+
+                  -0.78498828  0.75379658  0.17155361 -0.4938668   1.75192738
+                   1.37762833 -0.61863315 -1.9494741  -0.86816585 -0.45227802
+                   0.79538536  2.04304862 -0.61863315  1.04491806  0.33790874
+                   0.75379658 -1.49199748 -0.45227802 -1.11769855 -0.70181072
+
+                   0.0294075   0.65676796 -1.53899395 -1.46057391 -0.71558321
+                   0.61755812  1.36254871  0.18624771  1.36254871 -0.48032296
+                  -0.71558321 -0.59795308 -1.30373383  1.28412855 -0.63716316
+                   0.18624771  0.30387771  0.06861746 -1.97030437  1.91148913",
+                  rows=1, cols=N*C*Hin*Win)
+  out = matrix(out, rows=1, cols=N*C*Hin*Win)
+  for (i in 1:length(out)) {
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                           as.scalar(target[1,i]), 1e-3, 1e-4)
+  }
+}
+
 conv2d = function() {
   /*
    * Test for the 2D convolution functions.
@@ -491,97 +582,6 @@ max_pool2d = function() {
   tmp = test_util::check_all_equal(out_builtin, target)
 }
 
-batch_norm2d = function() {
-  /*
-   * Test for the 2D (spatial) batch normalization function.
-   */
-  print("Testing the 2D (spatial) batch normalization function.")
-
-  # Generate data
-  N = 2  # Number of examples
-  C = 3  # num channels
-  Hin = 4  # input height
-  Win = 5  # input width
-  mode = 'train'  # execution mode
-  mu = 0.9  # momentum of moving averages
-  eps = 1e-5  # smoothing term
-  X = matrix("70  29 23 55 72
-              42  98 68 48 39
-              34  73 44  6 40
-              74  18 18 53 53
-
-              63  85 72 61 72
-              32  36 23 29 63
-               9  43 43 49 43
-              31  43 89 94 50
-
-              62  12 32 41 87
-              25  48 99 52 61
-              12  83 60 55 34
-              30  42 68 88 51
-
-
-              67  59 62 67 84
-               8  76 24 19 57
-              10  89 63 72  2
-              59  56 16 15 70
-
-              32  69 55 39 93
-              84  36  4 30 40
-              70 100 36 76 59
-              69  15 40 24 34
-
-              51  67 11 13 32
-              66  85 55 85 38
-              32  35 17 83 34
-              55  58 52  0 99", rows=N, cols=C*Hin*Win)
-
-  # Create layer
-  [gamma, beta, ema_mean, ema_var] = batch_norm2d::init(C)
-
-  # Forward
-  [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
-      batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
-
-  # Equivalency check
-  target = matrix("0.86215019 -0.76679718 -1.00517964  0.26619387  0.94161105
-                  -0.25030172  1.97460198  0.78268933 -0.01191914 -0.36949289
-                  -0.56814504  0.98134136 -0.17084086 -1.68059683 -0.32976246
-                   1.02107191 -1.20383179 -1.20383179  0.18673301  0.18673301
-
-                   0.50426388  1.41921711  0.87856293  0.42108631  0.87856293
-                  -0.78498828 -0.61863315 -1.15928721 -0.90975463  0.50426388
-                  -1.74153018 -0.32751167 -0.32751167 -0.07797909 -0.32751167
-                  -0.82657707 -0.32751167  1.58557224  1.79351616 -0.0363903
-
-                   0.4607178  -1.49978399 -0.71558321 -0.36269283  1.44096887
-                  -0.99005347 -0.08822262  1.91148913  0.06861746  0.42150795
-                  -1.49978399  1.28412855  0.38229787  0.18624771 -0.63716316
-                  -0.79400325 -0.32348287  0.69597805  1.48017895  0.0294075
-
-
-                   0.74295878  0.42511559  0.54430676  0.74295878  1.41837597
-                  -1.60113597  1.10053277 -0.96544927 -1.16410136  0.34565473
-                  -1.52167511  1.61702824  0.5840373   0.94161105 -1.83951855
-                   0.42511559  0.30592418 -1.28329265 -1.32302308  0.86215019
-
-                  -0.78498828  0.75379658  0.17155361 -0.4938668   1.75192738
-                   1.37762833 -0.61863315 -1.9494741  -0.86816585 -0.45227802
-                   0.79538536  2.04304862 -0.61863315  1.04491806  0.33790874
-                   0.75379658 -1.49199748 -0.45227802 -1.11769855 -0.70181072
-
-                   0.0294075   0.65676796 -1.53899395 -1.46057391 -0.71558321
-                   0.61755812  1.36254871  0.18624771  1.36254871 -0.48032296
-                  -0.71558321 -0.59795308 -1.30373383  1.28412855 -0.63716316
-                   0.18624771  0.30387771  0.06861746 -1.97030437  1.91148913",
-                  rows=1, cols=N*C*Hin*Win)
-  out = matrix(out, rows=1, cols=N*C*Hin*Win)
-  for (i in 1:length(out)) {
-    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
-                                           as.scalar(target[1,i]), 1e-3, 1e-4)
-  }
-}
-
 tanh = function() {
   /*
    * Test for the `tanh` forward function.


[2/4] systemml git commit: [MINOR] Update docs for softmax

Posted by du...@apache.org.
[MINOR] Update docs for softmax


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/585b85fe
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/585b85fe
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/585b85fe

Branch: refs/heads/master
Commit: 585b85fe0ff7753b69e8027bf6f8df0a8b594d30
Parents: cbfb21c
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Jun 19 13:52:54 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Jun 19 13:52:54 2017 -0700

----------------------------------------------------------------------
 scripts/nn/layers/softmax.dml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/585b85fe/scripts/nn/layers/softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/softmax.dml b/scripts/nn/layers/softmax.dml
index 68a7bc7..84627a9 100644
--- a/scripts/nn/layers/softmax.dml
+++ b/scripts/nn/layers/softmax.dml
@@ -26,10 +26,11 @@
 forward = function(matrix[double] scores)
     return (matrix[double] probs) {
   /*
-   * Computes the forward pass for a softmax classifier.  The inputs
-   * are interpreted as unnormalized, log-probabilities for each of
-   * N examples, and the softmax function transforms them to normalized
-   * probabilities.
+   * Computes the forward pass for a softmax classifier.  The input
+   * has N examples, each with D values that are interpreted as
+   * unnormalized, log-probabilities for each of D classes.  The softmax
+   * function transforms these values to normalized probabilities across
+   * the D classes, for every example.
    *
    * This can be interpreted as a generalization of the sigmoid
    * function to multiple classes.


[3/4] systemml git commit: [SYSTEMML-1674] Add a new 2D depthwise convolution layer

Posted by du...@apache.org.
[SYSTEMML-1674] Add a new 2D depthwise convolution layer

This adds a new 2D depthwise convolution layer.  A depthwise convolution
(1) applies a different set of M filters to each input channel
separately, thus expanding each input channel to M output channels, and
(2) concatenates the results into a single volume with C*M output
channels.  This is in contrast to a regular 2D convolution, in which all
of the filters would be applied to all of the input channels at once.

In addition to the new layer, this also adds the associated unit and
gradient tests.

Closes #542.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f2d975f5
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f2d975f5
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f2d975f5

Branch: refs/heads/master
Commit: f2d975f5015724434590098cb1f0371b082c65a6
Parents: 585b85f
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Jun 19 13:52:56 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Jun 19 13:52:56 2017 -0700

----------------------------------------------------------------------
 scripts/nn/layers/conv2d_depthwise.dml | 190 ++++++++++++++++++++++++++++
 scripts/nn/test/grad_check.dml         | 108 +++++++++++++++-
 scripts/nn/test/run_tests.dml          |   2 +
 scripts/nn/test/test.dml               |  69 ++++++++++
 4 files changed, 363 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/f2d975f5/scripts/nn/layers/conv2d_depthwise.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_depthwise.dml b/scripts/nn/layers/conv2d_depthwise.dml
new file mode 100644
index 0000000..0e9abb5
--- /dev/null
+++ b/scripts/nn/layers/conv2d_depthwise.dml
@@ -0,0 +1,190 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Depthwise Convolutional layer.
+ *
+ * Utilizes built-in convolution operators for higher performance.
+ */
+source("nn/util.dml") as util
+
+forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
+                   int Hin, int Win, int M, int Hf, int Wf,
+                   int strideh, int stridew, int padh, int padw)
+    return (matrix[double] out, int Hout, int Wout) {
+  /*
+   * Computes the forward pass for a 2D depthwise spatial convolutional
+   * layer with C*M filters of depth 1.  The input data has N examples,
+   * each represented as a 3D volume with C channels unrolled into a
+   * single vector.  For each group of M filters, a 2D convolution is
+   * applied to 1 unique input channel, yielding M output channels per
+   * input channel.  The resulting C groups of M output channels are
+   * then concatenated together channel-wise into a single volume of C*M
+   * output channels.  This can also be interpreted as C filters of
+   * depth 1 that expand each input channel to M output channels, where
+   * M is a "depth multiplier".
+   *
+   * Inputs:
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (C, M*Hf*Wf).
+   *  - b: Biases, of shape (C*M, 1).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - M: Number of filters per input channel (i.e. depth multiplier).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *  - padh: Padding for top and bottom sides.
+   *      For same output height as input, set `padh = (Hf - 1) / 2`,
+   *      assuming `strideh = 1`.
+   *      More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
+   *      preserves the spatial dimensions of the input.
+   *  - padw: Padding for left and right sides.
+   *      For same output width as input, set `padw = (Wf - 1) / 2`,
+   *      assuming `stridew = 1`.
+   *      More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
+   *      preserves the spatial dimensions of the input.
+   *
+   * Outputs:
+   *  - out: Outputs, of shape (N, C*M*Hout*Wout).
+   *  - Hout: Output height.
+   *  - Wout: Output width.
+   */
+  N = nrow(X)
+  C = nrow(W)
+  Hout = as.integer(floor((Hin + 2*padh - Hf)/strideh + 1))
+  Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1))
+
+  # create output volume
+  out = matrix(0, rows=N, cols=C*M*Hout*Wout)
+
+  # depthwise convolution
+  # TODO: Explore usage of parfor loops more to determine if they can provide a performance
+  # benefit.  Initial tests show that they are slower than the regular for loop, likely because
+  # they cause a reduction from a multithreaded conv2d op to a singlethreaded version.  For a
+  # number of channels >> the number of examples, it's possible that the parfor loop could be
+  # faster.
+  #parfor (c in 1:C, check=0) {  # each channel
+  for (c in 1:C) {  # each channel
+    # run conv2d on each input channel separately, each with a different filter
+    Xc = X[,((c-1)*Hin*Win + 1):c*Hin*Win]  # shape (N, 1*Hin*Win)
+    Wc = matrix(W[c,], rows=M, cols=Hf*Wf)  # shape (M, Hf*Wf)
+    outc = conv2d(Xc, Wc, input_shape=[N,1,Hin,Win], filter_shape=[M,1,Hf,Wf],
+                  stride=[strideh,stridew], padding=[padh,padw])  # shape (N, M*Hout*Wout)
+    out[,((c-1)*M*Hout*Wout + 1):c*M*Hout*Wout] = outc
+  }
+
+  # add bias term to each output filter
+  out = bias_add(out, b)
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout,
+                    matrix[double] X, matrix[double] W, matrix[double] b,
+                    int Hin, int Win, int M, int Hf, int Wf,
+                    int strideh, int stridew, int padh, int padw)
+    return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
+  /*
+   * Computes the backward pass for a 2D depthwise spatial convolutional
+   * layer with C*M filters of depth 1.
+   *
+   * Inputs:
+   *  - dout: Gradient wrt `out` from upstream, of
+   *      shape (N, C*M*Hout*Wout).
+   *  - Hout: Output height.
+   *  - Wout: Output width.
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (C, M*Hf*Wf).
+   *  - b: Biases, of shape (C*M, 1).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - M: Num filters per input channel (i.e. depth multiplier).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *  - padh: Padding for top and bottom sides.
+   *  - padw: Padding for left and right sides.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+   *  - dW: Gradient wrt `W`, of shape (C, M*Hf*Wf).
+   *  - db: Gradient wrt `b`, of shape (C*M, 1).
+   */
+  N = nrow(X)
+  C = nrow(W)
+
+  # create gradient volumes
+  dX = matrix(0, rows=N, cols=C*Hin*Win)
+  dW = matrix(0, rows=C, cols=M*Hf*Wf)
+  db = matrix(0, rows=C*M, cols=1)
+
+  # partial derivatives for depthwise convolution
+  for (c in 1:C) {  # all examples
+    # extract channel c
+    doutc = dout[,((c-1)*M*Hout*Wout + 1):c*M*Hout*Wout]  # (N,M*Hout*Wout)
+    Xc = X[,((c-1)*Hin*Win + 1):c*Hin*Win]  # shape (N, 1*Hin*Win)
+    Wc = matrix(W[c,], rows=M, cols=Hf*Wf)  # shape (M, 1*Hf*Wf)
+
+    # compute gradients for channel c
+    dWc = conv2d_backward_filter(Xc, doutc, stride=[strideh,stridew], padding=[padh,padw],
+                                 input_shape=[N,1,Hin,Win], filter_shape=[M,1,Hf,Wf])
+    dXc = conv2d_backward_data(Wc, doutc, stride=[strideh,stridew], padding=[padh,padw],
+                               input_shape=[N,1,Hin,Win], filter_shape=[M,1,Hf,Wf])
+
+    # store
+    dX[,((c-1)*Hin*Win + 1):c*Hin*Win] = dXc
+    dW[c,] = matrix(dWc, rows=1, cols=M*Hf*Wf)
+  }
+
+  # partial derivatives for bias vector
+  db = util::channel_sums(dout, C*M, Hout, Wout)
+}
+
+init = function(int C, int M, int Hf, int Wf)
+    return (matrix[double] W, matrix[double] b) {
+  /*
+   * Initialize the parameters of this layer.
+   *
+   * Note: This is just a convenience function, and parameters
+   * may be initialized manually if needed.
+   *
+   * We use the heuristic by He et al., which limits the magnification
+   * of inputs/gradients during forward/backward passes by scaling
+   * unit-Gaussian weights by a factor of sqrt(2/n), under the
+   * assumption of relu neurons.
+   *  - http://arxiv.org/abs/1502.01852
+   *
+   * Inputs:
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - M: Number of filters per input channel (i.e. depth multiplier).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *
+   * Outputs:
+   *  - W: Weights, of shape (C, M*Hf*Wf).
+   *  - b: Biases, of shape (C*M, 1).
+   */
+  # Note: Each filter is applied to a volume of depth 1, so we only use Hf*Wf in the scaling factor.
+  W = rand(rows=C, cols=M*Hf*Wf, pdf="normal") * sqrt(2.0/(Hf*Wf))
+  b = matrix(0, rows=C*M, cols=1)
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/f2d975f5/scripts/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml
index 48e470c..67aeac1 100644
--- a/scripts/nn/test/grad_check.dml
+++ b/scripts/nn/test/grad_check.dml
@@ -27,6 +27,7 @@ source("nn/layers/batch_norm1d.dml") as batch_norm1d
 source("nn/layers/batch_norm2d.dml") as batch_norm2d
 source("nn/layers/conv2d.dml") as conv2d
 source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
+source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise
 source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/dropout.dml") as dropout
@@ -347,10 +348,10 @@ conv2d = function() {
 
   # Generate data
   N = 2  # num examples
-  C = 2  # num channels
+  C = 3  # num channels
   Hin = 5  # input height
   Win = 5  # input width
-  F = 2  # num filters
+  F = 4  # num filters
   Hf = 3  # filter height
   Wf = 3  # filter width
   stride = 1
@@ -436,10 +437,10 @@ conv2d_builtin = function() {
 
   # Generate data
   N = 2  # num examples
-  C = 2  # num channels
+  C = 3  # num channels
   Hin = 5  # input height
   Win = 5  # input width
-  F = 2  # num filters
+  F = 4  # num filters
   Hf = 3  # filter height
   Wf = 3  # filter width
   stride = 1
@@ -531,10 +532,10 @@ conv2d_simple = function() {
 
   # Generate data
   N = 2  # num examples
-  C = 2  # num channels
+  C = 3  # num channels
   Hin = 5  # input height
   Win = 5  # input width
-  F = 2  # num filters
+  F = 4  # num filters
   Hf = 3  # filter height
   Wf = 3  # filter width
   stride = 1
@@ -617,6 +618,101 @@ conv2d_simple = function() {
   }
 }
 
+conv2d_depthwise = function() {
+  /*
+   * Gradient check for the 2D depthwise convolutional layer.
+   */
+  print("Grad checking the 2D depthwise convolutional layer with L2 loss.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 3  # num channels
+  Hin = 5  # input height
+  Win = 5  # input width
+  M = 4  # depth multiplier
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 1
+  pad = 1
+  X = rand(rows=N, cols=C*Hin*Win)
+  y = rand(rows=N, cols=C*M*Hin*Win)
+
+  # Create layers
+  [W, b] = conv2d_depthwise::init(C, M, Hf, Wf)
+
+  # Compute analytical gradients of loss wrt parameters
+  [out, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                pad, pad)
+  dout = l2_loss::backward(out, y)
+  [dX, dW, db] = conv2d_depthwise::backward(dout, Hout, Wout, X, W, b, Hin, Win, M, Hf, Wf,
+                                            stride, stride, pad, pad)
+
+  # Grad check
+  h = 1e-5
+  print(" - Grad checking X.")
+  for (i in 1:nrow(X)) {
+    for (j in 1:ncol(X)) {
+      # Compute numerical derivative
+      old = as.scalar(X[i,j])
+      X[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossmh = l2_loss::forward(outmh, y)
+      X[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossph = l2_loss::forward(outph, y)
+      X[i,j] = old  # reset
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking W.")
+  for (i in 1:nrow(W)) {
+    for (j in 1:ncol(W)) {
+      # Compute numerical derivative
+      old = as.scalar(W[i,j])
+      W[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossmh = l2_loss::forward(outmh, y)
+      W[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossph = l2_loss::forward(outph, y)
+      W[i,j] = old  # reset
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking b.")
+  for (i in 1:nrow(b)) {
+    for (j in 1:ncol(b)) {
+      # Compute numerical derivative
+      old = as.scalar(b[i,j])
+      b[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossmh = l2_loss::forward(outmh, y)
+      b[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                      pad, pad)
+      lossph = l2_loss::forward(outph, y)
+      b[i,j] = old  # reset
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+    }
+  }
+}
+
 conv2d_transpose = function() {
   /*
    * Gradient check for the 2D transpose convolutional layer.

http://git-wip-us.apache.org/repos/asf/systemml/blob/f2d975f5/scripts/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml
index c9b1b3e..ec6fcff 100644
--- a/scripts/nn/test/run_tests.dml
+++ b/scripts/nn/test/run_tests.dml
@@ -45,6 +45,7 @@ grad_check::batch_norm2d()
 grad_check::conv2d()
 grad_check::conv2d_builtin()
 grad_check::conv2d_simple()
+grad_check::conv2d_depthwise()
 grad_check::conv2d_transpose()
 grad_check::dropout()
 grad_check::lstm()
@@ -86,6 +87,7 @@ print("---")
 test::batch_norm1d()
 test::batch_norm2d()
 test::conv2d()
+test::conv2d_depthwise()
 test::conv2d_transpose()
 test::cross_entropy_loss()
 test::im2col()

http://git-wip-us.apache.org/repos/asf/systemml/blob/f2d975f5/scripts/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml
index 94965bf..c60aab9 100644
--- a/scripts/nn/test/test.dml
+++ b/scripts/nn/test/test.dml
@@ -26,6 +26,7 @@ source("nn/layers/batch_norm1d.dml") as batch_norm1d
 source("nn/layers/batch_norm2d.dml") as batch_norm2d
 source("nn/layers/conv2d.dml") as conv2d
 source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
+source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise
 source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/max_pool2d.dml") as max_pool2d
@@ -200,6 +201,74 @@ conv2d = function() {
   }
 }
 
+conv2d_depthwise = function() {
+  /*
+   * Test for the 2D depthwise convolution function.
+   */
+  print("Testing the 2D depthwise convolution function.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 2  # num channels
+  Hin = 3  # input height
+  Win = 3  # input width
+  M = 2  # num filters per input channel (i.e. depth multiplier)
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 1
+  pad = 1
+  X = matrix(seq(1,N*C*Hin*Win), rows=N, cols=C*Hin*Win) / (N*C*Hin*Win) * 2 - 1  # normalized
+
+  # Create layer
+  W = matrix(seq(1,C*M*Hf*Wf), rows=C, cols=M*Hf*Wf) / (C*M*Hf*Wf) * 2 - 1  # normalized
+  b = matrix(seq(1,C*M), rows=C*M, cols=1) / (C*M)^2  # non-zero & non-one
+
+  # Forward
+  [out, Hout, Wout] = conv2d_depthwise::forward(X, W, b, Hin, Win, M, Hf, Wf, stride, stride,
+                                                pad, pad)
+
+  # Equivalency check
+  target = matrix("2.13040113  3.20447516  2.16743827
+                   3.30324078  4.94212961  3.30324078
+                   2.16743827  3.20447516  2.13040113
+
+                   0.52623457  0.85030866  0.67438275
+                   1.11574078  1.75462961  1.2824074
+                   0.89660496  1.35030866  0.97067899
+
+                  -0.30015433 -0.42052469 -0.15200615
+                  -0.15509261 -0.1828704   0.01157404
+                   0.07021603  0.07947529  0.1442901
+
+                  -0.90432101 -1.27469134 -0.64506173
+                  -0.8425926  -1.12037039 -0.50925928
+                  -0.20061731 -0.2746914  -0.01543214
+
+
+                  -0.31404325 -0.62885809 -0.49922845
+                  -0.86342597 -1.55787039 -1.19675934
+                  -0.94367278 -1.62885797 -1.20293212
+
+                   0.0817901   0.01697529  0.00771603
+                  -0.05092596 -0.2453704  -0.21759261
+                  -0.21450615 -0.48302469 -0.36265433
+
+                   1.25540125  1.74614203  1.1813271
+                   1.67824078  2.31712961  1.51157403
+                   0.95910496  1.24614203  0.81095684
+
+                   2.65123463  3.8919754   2.68827152
+                   3.99074078  5.87962961  3.99074078
+                   2.68827152  3.8919754   2.65123463", rows=N, cols=C*M*Hout*Wout)
+
+  for (i in 1:nrow(out)) {
+    for(j in 1:ncol(out)) {
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]),
+                                             as.scalar(target[i,j]), 1e-3, 1e-4)
+    }
+  }
+}
+
 conv2d_transpose = function() {
   /*
    * Test for the 2D transpose convolution function.


[4/4] systemml git commit: [SYSTEMML-1675] Add a new 2D depthwise transpose convolution layer

Posted by du...@apache.org.
[SYSTEMML-1675] Add a new 2D depthwise transpose convolution layer

This adds a new 2D depthwise transpose convolution layer.  A depthwise
transpose convolution (1) applies a different filter to each unique
group of M input channels separately, thus condensing each group of M
input channels to 1 output channel, and (2) concatenates the results
into a single volume with C/M output channels.  This is in contrast to
a regular 2D transpose convolution, in which all of the filters would be
applied to all of the input channels at once.

In addition to the new layer, this also adds the associated unit and
gradient tests.

Closes #542.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c83e99af
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c83e99af
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c83e99af

Branch: refs/heads/master
Commit: c83e99af755100e37f3e4fbfed20b5c455a635d5
Parents: f2d975f
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Mon Jun 19 13:52:59 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Mon Jun 19 13:52:59 2017 -0700

----------------------------------------------------------------------
 .../nn/layers/conv2d_transpose_depthwise.dml    | 198 +++++++++++++++++++
 scripts/nn/test/grad_check.dml                  | 104 ++++++++++
 scripts/nn/test/run_tests.dml                   |   2 +
 scripts/nn/test/test.dml                        |  59 ++++++
 4 files changed, 363 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/c83e99af/scripts/nn/layers/conv2d_transpose_depthwise.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_transpose_depthwise.dml b/scripts/nn/layers/conv2d_transpose_depthwise.dml
new file mode 100644
index 0000000..fdd7c10
--- /dev/null
+++ b/scripts/nn/layers/conv2d_transpose_depthwise.dml
@@ -0,0 +1,198 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Depthwise Transpose Convolutional layer.
+ *
+ * Utilizes built-in convolution operators for higher performance.
+ */
+source("nn/util.dml") as util
+
+forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
+                   int C, int Hin, int Win, int M, int Hf, int Wf,
+                   int strideh, int stridew, int padh, int padw,
+                   int out_padh, int out_padw)
+    return (matrix[double] out, int Hout, int Wout){
+  /*
+   * Computes the forward pass for a 2D depthwise spatial transpose
+   * convolutional layer with C/M filters of depth M.  The input data
+   * has N examples, each represented as a 3D volume with C channels
+   * unrolled into a single vector.  For each group of M input channels,
+   * a 2D transpose convolution is applied with 1 unique filter,
+   * yielding 1 output channel per input group of M input channels.
+   * The resulting C/M separate output channels are then concatenated
+   * together channel-wise into a single volume of C/M output channels.
+   *
+   * Inputs:
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (C/M, M*Hf*Wf).
+   *  - b: Biases, of shape (C/M, 1).
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - M: Depth of each filter (C must be divisible by M).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *  - padh: Padding for top and bottom sides.
+   *  - padw: Padding for left and right sides.
+   *  - out_padh: extra padding for top side. This should
+   *      lie in [0, strideh-1].
+   *  - out_padw: extra padding for right side. This should
+   *      lie in [0, stridew-1].
+   *
+   * Outputs:
+   *  - out: Outputs, of shape (N, C/M*Hout*Wout).
+   *  - Hout: Output height.
+   *  - Wout: Output width.
+   */
+  N = nrow(X)
+  F = nrow(W)
+  Hout = strideh*(Hin-1) - 2*padh + Hf + out_padh
+  Wout = stridew*(Win-1) - 2*padw + Wf + out_padw
+
+  # create output volume
+  out = matrix(0, rows=N, cols=C/M*Hout*Wout)
+
+  # depthwise transpose convolution
+  # TODO: Explore usage of parfor loops more to determine if they can provide a performance
+  # benefit.  Initial tests show that they are slower than the regular for loop, likely because
+  # they cause a reduction from a multithreaded conv2d op to a singlethreaded version.  For a
+  # number of filters C/M >> the number of examples, it's possible that the parfor loop could be
+  # faster.
+  #parfor (f in 1:F, check=0) {  # each channel
+  for (f in 1:F) {
+    # compute gradient wrt data of conv2d using 1 filter and M input channels
+    w = matrix(W[f,], rows=M, cols=Hf*Wf)  # 1 filter, of shape (M, 1*Hf*Wf)
+    Xm = X[,((f-1)*M*Hin*Win + 1):f*M*Hin*Win]  # M input channels, of shape (N, M*Hin*Win)
+    outm = conv2d_backward_data(w, Xm, stride=[strideh,stridew], padding=[padh,padw],
+                                input_shape=[N,1,Hout,Wout], filter_shape=[M,1,Hf,Wf])
+
+    # store
+    out[,((f-1)*Hout*Wout + 1):f*Hout*Wout] = outm  # outm has shape (N, 1*Hout*Wout)
+  }
+
+  # add bias term to each output filter
+  out = bias_add(out, b)
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout,
+                    matrix[double] X, matrix[double] W, matrix[double] b,
+                    int C, int Hin, int Win, int M, int Hf, int Wf,
+                    int strideh, int stridew, int padh, int padw)
+    return (matrix[double] dX, matrix[double] dW, matrix[double] db){
+  /*
+   * Computes the backward pass for a 2D spatial transpose
+   * convolutional layer with F filters.
+   *
+   * Inputs:
+   *  - dout: Gradient wrt `out` from upstream, of
+   *      shape (N, C/M*Hout*Wout).
+   *  - Hout: Output height.
+   *  - Wout: Output width.
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - W: Weights, of shape (C/M, M*Hf*Wf).
+   *  - b: Biases, of shape (C/M, 1).
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - M: Depth of each filter (C must be divisible by M).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *  - padh: Padding for top and bottom sides.
+   *  - padw: Padding for left and right sides.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+   *  - dW: Gradient wrt `W`, of shape (C/M, M*Hf*Wf).
+   *  - db: Gradient wrt `b`, of shape (C/M, 1).
+   */
+  N = nrow(X)
+  F = nrow(W)
+
+  # create gradient volumes
+  dX = matrix(0, rows=N, cols=C*Hin*Win)
+  dW = matrix(0, rows=C/M, cols=M*Hf*Wf)
+  db = matrix(0, rows=C/M, cols=1)
+
+  # depthwise transpose convolution
+  for (f in 1:F) {
+    # extract 1 gradient channel, 1 depth-1 filter, and M input channels, since the forward pass
+    # maps M input channels to 1 output channel for each filter
+    doutf = dout[,((f-1)*Hout*Wout + 1):f*Hout*Wout]  # shape (N, 1*Hout*Wout)
+    w = matrix(W[f,], rows=M, cols=Hf*Wf)  # 1 filter, of shape (M, 1*Hf*Wf)
+    Xm = X[,((f-1)*M*Hin*Win + 1):f*M*Hin*Win]  # M input channels, of shape (N, M*Hin*Win)
+
+    # compute gradients:
+    # conv2d_backward_filter takes the input and gradient wrt the output
+    # as first and second args, respectively. Given that we need to
+    # compute the grad wrt to filter for transpose convolution, where
+    # the roles of the input and output are reversed, we reverse the
+    # order of the args (along with setting input_shape to the dout
+    # shape).
+    dw = conv2d_backward_filter(doutf, Xm, stride=[strideh,stridew], padding=[padh,padw],
+                                input_shape=[N,1,Hout,Wout], filter_shape=[M,1,Hf,Wf])
+    # Since the forward for transpose convolution makes a call to
+    # conv2d_backward_data, to compute its derivative wrt to data
+    # we can run conv2d by applying the filter on the grad wrt the
+    # output (this makes sense because convolution transpose is the
+    # 'reverse' of convolution). It's easy to see that this will produce
+    # an output of the required size.
+    dXm = conv2d(doutf, w, input_shape=[N,1,Hout,Wout], filter_shape=[M,1,Hf,Wf],
+                 stride=[strideh,stridew], padding=[padh,padw])
+
+    # store
+    dX[,((f-1)*M*Hin*Win + 1):f*M*Hin*Win] = dXm
+    dW[f,] = matrix(dw, rows=1, cols=M*Hf*Wf)
+  }
+
+  # partial derivatives for bias vector
+  db = util::channel_sums(dout, C/M, Hout, Wout)
+}
+
+init = function(int C, int M, int Hf, int Wf)
+    return (matrix[double] W, matrix[double] b){
+  /*
+   * Utility function to initialize the parameters of this layer.
+   *
+   * We use the heuristic by He et al., which limits the magnification
+   * of inputs/gradients during forward/backward passes by scaling
+   * unit-Gaussian weights by a factor of sqrt(2/n), under the
+   * assumption of relu neurons.
+   *  - http://arxiv.org/abs/1502.01852
+   *
+   * Inputs:
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - M: Depth of each filter (C must be divisible by M).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *
+   * Outputs:
+   *  - W: Weights, of shape (C/M, M*Hf*Wf).
+   *  - b: Biases, of shape (C/M, 1).
+   */
+  W = rand(rows=C/M, cols=M*Hf*Wf, pdf="normal") * sqrt(2/(M*Hf*Wf))
+  b = matrix(0, rows=C/M, cols=1)
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/c83e99af/scripts/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml
index 67aeac1..fcb45cd 100644
--- a/scripts/nn/test/grad_check.dml
+++ b/scripts/nn/test/grad_check.dml
@@ -29,6 +29,7 @@ source("nn/layers/conv2d.dml") as conv2d
 source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
 source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise
 source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
+source("nn/layers/conv2d_transpose_depthwise.dml") as conv2d_transpose_depthwise
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/dropout.dml") as dropout
 source("nn/layers/l1_loss.dml") as l1_loss
@@ -809,6 +810,109 @@ conv2d_transpose = function() {
   }
 }
 
+conv2d_transpose_depthwise = function() {
+  /*
+   * Gradient check for the 2D depthwise transpose convolutional layer.
+   */
+  print("Grad checking the 2D depthwise transpose convolutional layer with L2 loss.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 8  # num channels
+  Hin = 3  # input height
+  Win = 3  # input width
+  M = 4  # depth of filters
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 2
+  pad = 1
+  out_pad = 1
+  X = rand(rows=N, cols=C*Hin*Win)
+
+  # Create layers
+  [W, b] = conv2d_transpose_depthwise::init(C, M, Hf, Wf)
+
+  # Compute analytical gradients of loss wrt parameters
+  [out, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                          stride, stride, pad, pad,
+                                                          out_pad, out_pad)
+  y = rand(rows=N, cols=C/M*Hout*Wout)
+  dout = l2_loss::backward(out,y)
+  [dX, dW, db] = conv2d_transpose_depthwise::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, M,
+                                                      Hf, Wf, stride, stride, pad, pad)
+
+  # Grad check
+  h = 1e-5
+  print(" - Grad checking X.")
+  for (i in 1:nrow(X)) {
+    for (j in 1:ncol(X)) {
+      # Compute numerical derivative
+      old = as.scalar(X[i,j])
+      X[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossmh = l2_loss::forward(outmh, y)
+      X[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossph = l2_loss::forward(outph, y)
+      X[i,j] = old  # reset
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking W.")
+  for (i in 1:nrow(W)) {
+    for (j in 1:ncol(W)) {
+      # Compute numerical derivative
+      old = as.scalar(W[i,j])
+      W[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossmh = l2_loss::forward(outmh, y)
+      W[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossph = l2_loss::forward(outph, y)
+      W[i,j] = old  # reset
+      dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+    }
+  }
+
+  print(" - Grad checking b.")
+  for (i in 1:nrow(b)) {
+    for (j in 1:ncol(b)) {
+      # Compute numerical derivative
+      old = as.scalar(b[i,j])
+      b[i,j] = old - h
+      [outmh, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossmh = l2_loss::forward(outmh, y)
+      b[i,j] = old + h
+      [outph, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                                stride, stride, pad, pad,
+                                                                out_pad, out_pad)
+      lossph = l2_loss::forward(outph, y)
+      b[i,j] = old  # reset
+      db_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+    }
+  }
+}
+
 cross_entropy_loss = function() {
   /*
    * Gradient check for the cross-entropy loss function.

http://git-wip-us.apache.org/repos/asf/systemml/blob/c83e99af/scripts/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml
index ec6fcff..5f3ca6e 100644
--- a/scripts/nn/test/run_tests.dml
+++ b/scripts/nn/test/run_tests.dml
@@ -47,6 +47,7 @@ grad_check::conv2d_builtin()
 grad_check::conv2d_simple()
 grad_check::conv2d_depthwise()
 grad_check::conv2d_transpose()
+grad_check::conv2d_transpose_depthwise()
 grad_check::dropout()
 grad_check::lstm()
 grad_check::max_pool2d()
@@ -89,6 +90,7 @@ test::batch_norm2d()
 test::conv2d()
 test::conv2d_depthwise()
 test::conv2d_transpose()
+test::conv2d_transpose_depthwise()
 test::cross_entropy_loss()
 test::im2col()
 test::max_pool2d()

http://git-wip-us.apache.org/repos/asf/systemml/blob/c83e99af/scripts/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml
index c60aab9..37f9f73 100644
--- a/scripts/nn/test/test.dml
+++ b/scripts/nn/test/test.dml
@@ -28,6 +28,7 @@ source("nn/layers/conv2d.dml") as conv2d
 source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
 source("nn/layers/conv2d_depthwise.dml") as conv2d_depthwise
 source("nn/layers/conv2d_transpose.dml") as conv2d_transpose
+source("nn/layers/conv2d_transpose_depthwise.dml") as conv2d_transpose_depthwise
 source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/max_pool2d.dml") as max_pool2d
 source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
@@ -326,6 +327,64 @@ conv2d_transpose = function() {
   }
 }
 
+conv2d_transpose_depthwise = function() {
+  /*
+   * Test for the 2D depthwise transpose convolution function.
+   */
+  print("Testing the 2D depthwise transpose convolution function.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 4  # num channels
+  Hin = 2  # input height
+  Win = 2  # input width
+  M = 2  # depth of each filter
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 1
+  pad = 0
+  out_pad = 0  # padding added to output
+  X = matrix(seq(1,N*C*Hin*Win), rows=N, cols=C*Hin*Win) / (N*C*Hin*Win) * 2 - 1  # normalized
+
+  # Create layer
+  W = matrix(seq(1,C/M*M*Hf*Wf), rows=C/M, cols=M*Hf*Wf) / (C/M*M*Hf*Wf) * 2 - 1  # normalized
+  b = matrix(seq(1,C/M), rows=C/M, cols=1) / (C/M)^2  # non-zero & non-one
+
+  # Forward
+  [out, Hout, Wout] = conv2d_transpose_depthwise::forward(X, W, b, C, Hin, Win, M, Hf, Wf,
+                                                          stride, stride, pad, pad,
+                                                          out_pad, out_pad)
+
+  # Equivalency check
+  target = matrix("1.44097221  2.45486116  2.28125     1.1875
+                   2.1875      3.80555558  3.48611116  1.72916663
+                   1.6875      2.84722233  2.52777767  1.27083325
+                   0.80902779  1.24652779  1.10069442  0.625
+
+                   0.37152776  0.24652773  0.18402778  0.35416669
+                   0.21527778 -0.02777781 -0.12500003  0.22916666
+                   0.04861115 -0.31944442 -0.41666669  0.10416666
+                   0.32291669  0.20486113  0.1701389   0.375
+
+
+                   0.05208334 -0.21180555 -0.16319445  0.02083334
+                  -0.25694442 -0.8611111  -0.7361111  -0.27083331
+                  -0.09027778 -0.4861111  -0.3611111  -0.0625
+                   0.08680556 -0.08680557 -0.01041669  0.125
+
+                   0.98263896  1.57986116  1.73958337  1.1875
+                   1.77083337  3.30555558  3.65277791  2.22916675
+                   2.27083325  4.34722233  4.69444466  2.77083349
+                   1.60069442  2.87152767  3.05902767  1.875     ", rows=N, cols=C/M*Hout*Wout)
+
+  for (i in 1:nrow(out)) {
+    for(j in 1:ncol(out)) {
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]),
+                                             as.scalar(target[i,j]), 1e-3, 1e-4)
+    }
+  }
+}
+
 cross_entropy_loss = function() {
   /*
    * Test for the cross-entropy loss function.