You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/06/15 22:22:40 UTC

[2/3] systemml git commit: [MINOR] Update `nn` library formatting

[MINOR] Update `nn` library formatting


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d49ab981
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d49ab981
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d49ab981

Branch: refs/heads/master
Commit: d49ab98116054c98aa65a1f0d173a9c181b1f32f
Parents: 324bea5
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Thu Jun 15 15:20:47 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Thu Jun 15 15:20:47 2017 -0700

----------------------------------------------------------------------
 scripts/nn/examples/mnist_lenet.dml    |   4 +-
 scripts/nn/layers/conv2d_transpose.dml | 149 +++++++++++++---------------
 2 files changed, 71 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/d49ab981/scripts/nn/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/mnist_lenet.dml b/scripts/nn/examples/mnist_lenet.dml
index 986d797..57b8ba6 100644
--- a/scripts/nn/examples/mnist_lenet.dml
+++ b/scripts/nn/examples/mnist_lenet.dml
@@ -114,8 +114,8 @@ train = function(matrix[double] X, matrix[double] Y,
 
       # Compute forward pass
       ## layer 1: conv1 -> relu1 -> pool1
-      [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
-                                                pad, pad)
+      [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf,
+                                                stride, stride, pad, pad)
       outr1 = relu::forward(outc1)
       [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
                                                     strideh=2, stridew=2, pad=0, pad=0)

http://git-wip-us.apache.org/repos/asf/systemml/blob/d49ab981/scripts/nn/layers/conv2d_transpose.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_transpose.dml b/scripts/nn/layers/conv2d_transpose.dml
index 0838563..eee19a5 100644
--- a/scripts/nn/layers/conv2d_transpose.dml
+++ b/scripts/nn/layers/conv2d_transpose.dml
@@ -63,25 +63,24 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
   Hout = strideh * (Hin-1) - 2*padh + Hf + out_padh
   Wout = stridew * (Win-1) - 2*padw + Wf + out_padw
 
-  /*
-   * Transpose convolution aims to go in the other direction of
-   * (direct) convolution, i.e., given input X, produce output O such
-   * that running convolution on O recovers X. This is achieved by
-   * conv2d_backward_data (since the derivative wrt data must produce
-   * output of same size as the input to conv2d). By reusing a built-in
-   * operator we achieve efficiency and restrict the number of built-in
-   * operators to manageable levels. Plus, most other deep-learning
-   * packages make use of the same strategy which means this
-   * implementation of transpose convolution is 'in-sync' with them.
-   *
-   * One potential downside of reusing conv2d_backward_data is the fact
-   * that it rotates the filter by 180 degrees before applying it. This
-   * needs to be kept in mind when interpreting the output of transpose
-   * convolution.
-   */
+  # Transpose convolution aims to go in the other direction of
+  # (direct) convolution, i.e., given input X, produce output O such
+  # that running convolution on O recovers X. This is achieved by
+  # conv2d_backward_data (since the derivative wrt data must produce
+  # output of same size as the input to conv2d). By reusing a built-in
+  # operator we achieve efficiency and restrict the number of built-in
+  # operators to manageable levels. Plus, most other deep-learning
+  # packages make use of the same strategy which means this
+  # implementation of transpose convolution is 'in-sync' with them.
+  #
+  # One potential downside of reusing conv2d_backward_data is the fact
+  # that it rotates the filter by 180 degrees before applying it. This
+  # needs to be kept in mind when interpreting the output of transpose
+  # convolution.
   out = conv2d_backward_data(W, X, stride=[strideh,stridew], padding=[padh,padw],
                              input_shape=[N,F,Hout,Wout], filter_shape=[C,F,Hf,Wf])
 
+  # Add bias term to each output filter
   out = bias_add(out, b)
 }
 
@@ -120,36 +119,33 @@ backward = function(matrix[double] dout, int Hout, int Wout,
   N = nrow(X)
   F = nrow(b)
 
-  /*
-   * conv2d_backward_filter takes the input and delta map as first and
-   * second args, respectively. Given that we need to compute the
-   * grad (wrt to filter) for transpose convolution where the roles of
-   * the input and output are reversed, we reverse the order of the
-   * args (along with setting input_shape to the delta map shape).
-   * Effectively, we are running a direct convolution with X as the
-   * filter and the dout as the input. To convince oneself that the
-   * interconnections between the cells of the filter, input and delta
-   * map are preserved please keep in mind that the forward of
-   * convolution transpose rotates the filter by 180 degrees before
-   * applying it.
-   */
+  # conv2d_backward_filter takes the input and delta map as first and
+  # second args, respectively. Given that we need to compute the
+  # grad (wrt to filter) for transpose convolution where the roles of
+  # the input and output are reversed, we reverse the order of the
+  # args (along with setting input_shape to the delta map shape).
+  # Effectively, we are running a direct convolution with X as the
+  # filter and the dout as the input. To convince oneself that the
+  # interconnections between the cells of the filter, input and delta
+  # map are preserved please keep in mind that the forward of
+  # convolution transpose rotates the filter by 180 degrees before
+  # applying it.
   dW = conv2d_backward_filter(dout, X, stride=[strideh,stridew], padding=[padh,padw],
                               input_shape=[N,F,Hout,Wout], filter_shape=[C,F,Hf,Wf])
 
-  /*
-   * Since the forward for transpose convolution makes a call to
-   * conv2d_backward_data, to compute its derivative wrt to data
-   * we can run conv2d by applying the filter on the delta
-   * map (this makes sense because convolution transpose is the
-   * 'reverse' of convolution). Its easy to see that this will produce
-   * output of the required size. To convince oneself that conv2d will
-   * respect the interconnections between the cells in the delta map
-   * and the filter, keep in mind that the forward function rotates the
-   * filter by 180 degrees before applying it.
-   */
+  # Since the forward for transpose convolution makes a call to
+  # conv2d_backward_data, to compute its derivative wrt to data
+  # we can run conv2d by applying the filter on the delta
+  # map (this makes sense because convolution transpose is the
+  # 'reverse' of convolution). Its easy to see that this will produce
+  # output of the required size. To convince oneself that conv2d will
+  # respect the interconnections between the cells in the delta map
+  # and the filter, keep in mind that the forward function rotates the
+  # filter by 180 degrees before applying it.
   dX = conv2d(dout, W, input_shape=[N,F,Hout,Wout], filter_shape=[C,F,Hf,Wf],
               stride=[strideh,stridew], padding=[padh,padw])
 
+  # Partial derivatives for bias vector
   db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout))
 }
 
@@ -202,47 +198,40 @@ init_bilinear = function(int C, int K)
   vect = 1 - abs(seq(0, K-1) / factor_up - center)
   weights = matrix(vect %*% t(vect), rows=1, cols=K*K)
 
-  /*
-   * To create a multi-channel channel-independent upsampling filter,
-   * we need to intersperse the filter weights with 0s. For instance,
-   * consider the case of 2X upsampling. In this case, K=4 and we have
-   * K^2=16 weights to include into the 3D tensor representing the
-   * filter which should look like the following (assuming 3 channels):
-   *
-   *   <-16 weights-> <---------32 0s--------->
-   *   X X ...... X X 0 0 0 ............. 0 0 0
-   *   0 .......... 0 X X .... X X 0 ...... 0 0
-   *   0 0 0 ............... 0 0 0 X X .... X X
-   *
-   * To be clear, the second row should have 16 0s followed by 16
-   * weights followed by 16 0s.
-   *
-   * To create the above filter, we take advantage of the fact that
-   * between two sets of non-zero weights, there is always a sequence
-   * of C*K*K 0s. In the above example, C*K^2 = 48 (e.g., 32 trailing
-   * 0s in the first row and 16 leading 0s in the second row).
-   *
-   * Note that, in the special case of C=1 we do not need to
-   * intersperse with 0s (no question of being channel-wise independent
-   * since we have only 1 channel).
-   */
-  #if(C > 1){
-    /*
-     * Append C*K*K trailing 0s to the K*K kernel and replicate the
-     * resulting row C times
-     */
-    repl_weights = matrix(1, rows=C, cols=1) %*% cbind(weights, matrix(0, rows=1, cols=C*K*K))
+  # To create a multi-channel channel-independent upsampling filter,
+  # we need to intersperse the filter weights with 0s. For instance,
+  # consider the case of 2X upsampling. In this case, K=4 and we have
+  # K^2=16 weights to include into the 3D tensor representing the
+  # filter which should look like the following (assuming 3 channels):
+  #
+  #   <-16 weights-> <---------32 0s--------->
+  #   X X ...... X X 0 0 0 ............. 0 0 0
+  #   0 .......... 0 X X .... X X 0 ...... 0 0
+  #   0 0 0 ............... 0 0 0 X X .... X X
+  #
+  # To be clear, the second row should have 16 0s followed by 16
+  # weights followed by 16 0s.
+  #
+  # To create the above filter, we take advantage of the fact that
+  # between two sets of non-zero weights, there is always a sequence
+  # of C*K*K 0s. In the above example, C*K^2 = 48 (e.g., 32 trailing
+  # 0s in the first row and 16 leading 0s in the second row).
+  #
+  # Note that, in the special case of C=1 we do not need to
+  # intersperse with 0s (no question of being channel-wise independent
+  # since we have only 1 channel).
+
+  # Append C*K*K trailing 0s to the K*K kernel and replicate the
+  # resulting row C times
+  repl_weights = matrix(1, rows=C, cols=1) %*% cbind(weights, matrix(0, rows=1, cols=C*K*K))
 
-    /*
-     * The above operation added extra C*K*K trailing 0s in the last row
-     * that we do not need. Thus, we need to:
-     *   1) reshape the resulting matrix into a row
-     *   2) 'Clip off' the last few 0s using indexing and reshape the
-     *      result into the expected filter shape ([C, C, K, K])
-     */
-    repl_weights_row = matrix(repl_weights, rows=1, cols=C*(C+1)*K^2)
-    W = matrix(repl_weights_row[1,1:(C*K)^2], rows=C, cols=C*K^2)
-  #}else W = weights
+  # The above operation added extra C*K*K trailing 0s in the last row
+  # that we do not need. Thus, we need to:
+  #   1) reshape the resulting matrix into a row
+  #   2) 'Clip off' the last few 0s using indexing and reshape the
+  #      result into the expected filter shape ([C, C, K, K])
+  repl_weights_row = matrix(repl_weights, rows=1, cols=C*(C+1)*K^2)
+  W = matrix(repl_weights_row[1,1:(C*K)^2], rows=C, cols=C*K^2)
 
   b = matrix(0, rows=C, cols=1)
 }