You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/01 01:42:38 UTC
[5/7] incubator-systemml git commit: [SYSTEMML-1453] Update Conv &
Max Pooling layer names to include "2D"
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
deleted file mode 100644
index efd99c3..0000000
--- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
+++ /dev/null
@@ -1,215 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Convolutional layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial convolutional layer with
- * F filters. The input data has N examples, each represented as a 3D
- * volume unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - out: Outputs, of shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create output volume
- out = matrix(0, rows=N, cols=F*Hout*Wout)
-
- # Convolution - Simple reference implementation
- parfor (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- # Convolve image with filters
- parfor (f in 1:F, check=0) { # all filters
- parfor (hout in 1:Hout, check=0) { # all output rows
- h0 = (hout-1)*strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- w0 = (wout-1)*stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- parfor (c in 1:C, check=0) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1,
- cols=Hf*Wf) # reshape
- }
- out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] =
- W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,]
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout,
- matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
- /*
- * Computes the backward pass for a 2D spatial convolutional layer
- * with F filters.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt `b`, of shape (F, 1).
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create gradient volumes
- dX = matrix(0, rows=N, cols=C*Hin*Win)
- dW = matrix(0, rows=F, cols=C*Hf*Wf)
- db = matrix(0, rows=F, cols=1)
-
- # Partial derivatives for convolution - Simple reference implementation
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (f in 1:F) { # all filters
- for (hout in 1:Hout) { # all output rows
- h0 = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- w0 = (wout-1) * stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout],
- rows=C, cols=Hf*Wf) # reshape
- for (c in 1:C) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf],
- rows=1, cols=Hf*Wf) # reshape
- dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,],
- rows=Hf, cols=Wf) # reshape
- dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice,
- rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
- dW[f,] = dW[f,]
- + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf)
- * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- }
- }
- }
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
-init = function(int F, int C, int Hf, int Wf)
- return (matrix[double] W, matrix[double] b) {
- /*
- * Initialize the parameters of this layer.
- *
- * We use the heuristic by He et al., which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * unit-Gaussian weights by a factor of sqrt(2/n), under the
- * assumption of relu neurons.
- * - http://arxiv.org/abs/1502.01852
- *
- * Inputs:
- * - F: Number of filters.
- * - C: Number of input channels (dimensionality of depth).
- * - Hf: Filter height.
- * - Wf: Filter width.
- *
- * Outputs:
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- */
- W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
- b = matrix(0, rows=F, cols=1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index ba9a317..27f4420 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -24,8 +24,8 @@
*/
source("nn/layers/affine.dml") as affine
source("nn/layers/batch_norm.dml") as batch_norm
-source("nn/layers/conv.dml") as conv
-source("nn/layers/conv_builtin.dml") as conv_builtin
+source("nn/layers/conv2d.dml") as conv2d
+source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
source("nn/layers/dropout.dml") as dropout
source("nn/layers/l1_loss.dml") as l1_loss
@@ -34,16 +34,16 @@ source("nn/layers/l2_loss.dml") as l2_loss
source("nn/layers/l2_reg.dml") as l2_reg
source("nn/layers/log_loss.dml") as log_loss
source("nn/layers/lstm.dml") as lstm
-source("nn/layers/max_pool.dml") as max_pool
-source("nn/layers/max_pool_builtin.dml") as max_pool_builtin
+source("nn/layers/max_pool2d.dml") as max_pool2d
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
source("nn/layers/relu.dml") as relu
source("nn/layers/rnn.dml") as rnn
source("nn/layers/sigmoid.dml") as sigmoid
source("nn/layers/softmax.dml") as softmax
source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
-source("nn/test/conv_simple.dml") as conv_simple
-source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/conv2d_simple.dml") as conv2d_simple
+source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
source("nn/test/util.dml") as test_util
affine = function() {
@@ -229,11 +229,11 @@ batch_norm = function() {
}
}
-conv = function() {
+conv2d = function() {
/*
- * Gradient check for the convolutional layer using `im2col`.
+ * Gradient check for the 2D convolutional layer using `im2col`.
*/
- print("Grad checking the `im2col` convolutional layer with L2 loss.")
+ print("Grad checking the `im2col` 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -249,13 +249,13 @@ conv = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv::init(F, C, Hf, Wf)
+ [W, b] = conv2d::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [dX, dW, db] = conv2d::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -265,10 +265,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -284,10 +284,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -303,10 +303,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -317,12 +317,12 @@ conv = function() {
}
}
-conv_builtin = function() {
+conv2d_builtin = function() {
/*
- * Gradient check for the convolutional layer using built-in
+ * Gradient check for the 2D convolutional layer using built-in
* functions.
*/
- print("Grad checking the built-in convolutional layer with L2 loss.")
+ print("Grad checking the built-in 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -338,13 +338,14 @@ conv_builtin = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv_builtin::init(F, C, Hf, Wf)
+ [W, b] = conv2d_builtin::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [dX, dW, db] = conv2d_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -354,12 +355,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -375,12 +376,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -396,12 +397,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -412,11 +413,11 @@ conv_builtin = function() {
}
}
-conv_simple = function() {
+conv2d_simple = function() {
/*
- * Gradient check for the simple reference convolutional layer.
+ * Gradient check for the simple reference 2D convolutional layer.
*/
- print("Grad checking the simple reference convolutional layer with L2 loss.")
+ print("Grad checking the simple reference 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -432,13 +433,13 @@ conv_simple = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv_simple::init(F, C, Hf, Wf)
+ [W, b] = conv2d_simple::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [dX, dW, db] = conv2d_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -448,12 +449,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -469,12 +470,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -490,12 +491,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -898,11 +899,11 @@ lstm = function() {
}
}
-max_pool = function() {
+max_pool2d = function() {
/*
- * Gradient check for the max pooling layer.
+ * Gradient check for the 2D max pooling layer.
*/
- print("Grad checking the max pooling layer with L2 loss.")
+ print("Grad checking the 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -921,9 +922,9 @@ max_pool = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -932,10 +933,10 @@ max_pool = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -947,11 +948,11 @@ max_pool = function() {
}
}
-max_pool_builtin = function() {
+max_pool2d_builtin = function() {
/*
- * Gradient check for the max pooling layer.
+ * Gradient check for the 2D max pooling layer.
*/
- print("Grad checking the built-in max pooling layer with L2 loss.")
+ print("Grad checking the built-in 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -970,10 +971,11 @@ max_pool_builtin = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ dX = max_pool2d_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -982,12 +984,12 @@ max_pool_builtin = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -999,11 +1001,11 @@ max_pool_builtin = function() {
}
}
-max_pool_simple = function() {
+max_pool2d_simple = function() {
/*
- * Gradient check for the simple reference max pooling layer.
+ * Gradient check for the simple reference 2D max pooling layer.
*/
- print("Grad checking the simple reference max pooling layer with L2 loss.")
+ print("Grad checking the simple reference 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -1022,10 +1024,10 @@ max_pool_simple = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ dX = max_pool2d_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -1034,12 +1036,12 @@ max_pool_simple = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
new file mode 100644
index 0000000..47dab3a
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
@@ -0,0 +1,172 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Max Pooling layer.
+ *
+ * This implementation is intended to be a simple, reference version.
+ */
+
+forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * This implementation is intended to be a simple, reference version.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Create output volume
+ out = matrix(0, rows=N, cols=C*Hout*Wout)
+
+ # Max pooling
+ parfor (n in 1:N, check=0) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+
+ # Pad image
+ pad_value = -1/0
+ Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw))
+
+ parfor (c in 1:C, check=0) { # all channels
+ img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ parfor (hout in 1:Hout, check=0) { # all output rows
+ hin = (hout-1) * strideh + 1
+ parfor (wout in 1:Wout, check=0) { # all output columns
+ win = (wout-1) * stridew + 1
+ out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1,
+ win:win+Wf-1])
+ }
+ }
+ }
+ }
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX) {
+ /*
+ * Computes the backward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ */
+ N = nrow(X)
+
+ # Create gradient volume
+ dX = matrix(0, rows=N, cols=C*Hin*Win)
+
+ # Gradient of max pooling
+ for (n in 1:N) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+
+ # Pad image
+ pad_value = -1/0
+ Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ img = Xn_padded
+
+ dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
+ for (c in 1:C) { # all channels
+ img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
+ for (hout in 1:Hout, check=0) { # all output rows
+ hin = (hout-1) * strideh + 1
+ for (wout in 1:Wout) { # all output columns
+ win = (wout-1) * stridew + 1
+ img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
+ max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
+ # gradient passes through only for the max value(s) in this patch
+ dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
+ dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
+ + dimg_slice_patch
+ }
+ }
+ dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
+ }
+
+ # Unpad derivs on input
+ dXn = matrix(0, rows=C, cols=Hin*Win)
+ parfor (c in 1:C, check=0) {
+ dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
+ dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
+ dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
+ }
+ dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
deleted file mode 100644
index 786b0a1..0000000
--- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
+++ /dev/null
@@ -1,172 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Max pooling layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create output volume
- out = matrix(0, rows=N, cols=C*Hout*Wout)
-
- # Max pooling
- parfor (n in 1:N, check=0) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw))
-
- parfor (c in 1:C, check=0) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- parfor (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- win = (wout-1) * stridew + 1
- out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1,
- win:win+Wf-1])
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- */
- N = nrow(X)
-
- # Create gradient volume
- dX = matrix(0, rows=N, cols=C*Hin*Win)
-
- # Gradient of max pooling
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded
-
- dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (c in 1:C) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- for (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- win = (wout-1) * stridew + 1
- img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
- max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
- # gradient passes through only for the max value(s) in this patch
- dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
- dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
- + dimg_slice_patch
- }
- }
- dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
-
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
index 86bb77b..644662c 100644
--- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml
+++ b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
@@ -38,16 +38,16 @@ tmp = grad_check::log_loss()
# Other layers
tmp = grad_check::affine()
tmp = grad_check::batch_norm()
-tmp = grad_check::conv_simple()
-tmp = grad_check::conv()
-tmp = grad_check::conv_builtin()
+tmp = grad_check::conv2d_simple()
+tmp = grad_check::conv2d()
+tmp = grad_check::conv2d_builtin()
tmp = grad_check::dropout()
tmp = grad_check::l1_reg()
tmp = grad_check::l2_reg()
tmp = grad_check::lstm()
-tmp = grad_check::max_pool_simple()
-tmp = grad_check::max_pool()
-tmp = grad_check::max_pool_builtin()
+tmp = grad_check::max_pool2d_simple()
+tmp = grad_check::max_pool2d()
+tmp = grad_check::max_pool2d_builtin()
tmp = grad_check::relu()
tmp = grad_check::rnn()
tmp = grad_check::sigmoid()
@@ -72,9 +72,9 @@ print("---")
tmp = test::batch_norm()
tmp = test::im2col()
tmp = test::padding()
-tmp = test::conv()
+tmp = test::conv2d()
tmp = test::cross_entropy_loss()
-tmp = test::max_pool()
+tmp = test::max_pool2d()
tmp = test::spatial_batch_norm()
tmp = test::tanh()
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index 8fb0d04..64fc519 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -23,23 +23,23 @@
* Various tests, not including gradient checks.
*/
source("nn/layers/batch_norm.dml") as batch_norm
-source("nn/layers/conv.dml") as conv
-source("nn/layers/conv_builtin.dml") as conv_builtin
+source("nn/layers/conv2d.dml") as conv2d
+source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
-source("nn/layers/max_pool.dml") as max_pool
-source("nn/layers/max_pool_builtin.dml") as max_pool_builtin
+source("nn/layers/max_pool2d.dml") as max_pool2d
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
-source("nn/test/conv_simple.dml") as conv_simple
-source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/conv2d_simple.dml") as conv2d_simple
+source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
source("nn/test/util.dml") as test_util
source("nn/util.dml") as util
batch_norm = function() {
/*
- * Test for the `batch_norm` function.
+ * Test for the batch normalization function.
*/
- print("Testing the batch_norm function.")
+ print("Testing the batch normalization function.")
# Generate data
N = 4 # Number of examples
@@ -68,11 +68,11 @@ batch_norm = function() {
}
}
-conv = function() {
+conv2d = function() {
/*
- * Test for the `conv` functions.
+ * Test for the 2D convolution functions.
*/
- print("Testing the conv functions.")
+ print("Testing the 2D convolution functions.")
# Generate data
N = 2 # num examples
@@ -87,14 +87,14 @@ conv = function() {
X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
# Create layer
- [W, b] = conv::init(F, C, Hf, Wf)
+ [W, b] = conv2d::init(F, C, Hf, Wf)
# Forward
- [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Equivalency check
out = matrix(out, rows=1, cols=N*F*Hout*Wout)
@@ -110,7 +110,7 @@ conv = function() {
cross_entropy_loss = function() {
/*
- * Test for the `cross-entropy` loss function.
+ * Test for the cross-entropy loss function.
*
* Here we make sure that the cross-entropy loss function does
* not propagate `infinity` values in the case that a prediction is
@@ -206,11 +206,11 @@ padding = function() {
}
}
-max_pool = function() {
+max_pool2d = function() {
/*
- * Test for the `max_pool` functions.
+ * Test for the 2D max pooling functions.
*/
- print("Testing the max pool functions.")
+ print("Testing the 2D max pooling functions.")
# Generate data
N = 2 # num examples
@@ -227,12 +227,14 @@ max_pool = function() {
print(" - Testing w/ padh="+padh+" & padw="+padw+".")
#if (1==1) {} # force correct printing
#print(" - Testing forward")
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, padh, padw)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride,
- padh, padw)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride,
+ padh, padw)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win,
+ Hf, Wf,
+ stride, stride,
+ padh, padw)
# Equivalency check
out = matrix(out, rows=1, cols=N*C*Hout*Wout)
@@ -247,11 +249,12 @@ max_pool = function() {
#print(" - Testing backward")
dout = rand(rows=N, cols=C*Hout*Wout, pdf="normal")
- dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
- dX_simple = max_pool_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win, Hf, Wf,
- stride, stride, padh, padw)
- dX_builtin = max_pool_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win,
+ dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ padh, padw)
+ dX_simple = max_pool2d_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win,
Hf, Wf, stride, stride, padh, padw)
+ dX_builtin = max_pool2d_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win,
+ Hf, Wf, stride, stride, padh, padw)
# Equivalency check
dX = matrix(dX, rows=1, cols=N*C*Hin*Win)
@@ -288,11 +291,11 @@ max_pool = function() {
pad = 0
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -326,11 +329,11 @@ max_pool = function() {
pad = 1
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -363,11 +366,11 @@ max_pool = function() {
pad = 0
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -402,11 +405,11 @@ max_pool = function() {
pad = 1
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -417,7 +420,8 @@ max_pool = function() {
# 0 0 0
# 0 -6 0
# 0 0 0
- target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", rows=1, cols=C*Hout*Wout)
+ target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16",
+ rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
tmp = test_util::check_all_equal(out, target)
tmp = test_util::check_all_equal(out_simple, target)
@@ -426,9 +430,9 @@ max_pool = function() {
spatial_batch_norm = function() {
/*
- * Test for the `spatial_batch_norm` function.
+ * Test for the spatial batch normalization function.
*/
- print("Testing the spatial_batch_norm function.")
+ print("Testing the spatial batch normalization function.")
# Generate data
N = 2 # Number of examples
@@ -532,7 +536,8 @@ tanh = function() {
# Equivalency check
for (i in 1:nrow(out)) {
for (j in 1:ncol(out)) {
- rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
+ rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]),
+ 1e-10, 1e-12)
}
}
}