You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/01 01:42:34 UTC
[1/7] incubator-systemml git commit: [SYSTEMML-1452] General code
cleanup of SystemML-NN
Repository: incubator-systemml
Updated Branches:
refs/heads/master 2e48d951b -> ac8ee2bef
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
index fb9d02c..efd99c3 100644
--- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
@@ -24,6 +24,7 @@
*
* This implementation is intended to be a simple, reference version.
*/
+
forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
int C, int Hin, int Win, int Hf, int Wf,
int strideh, int stridew, int padh, int padw)
@@ -36,9 +37,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
* This implementation is intended to be a simple, reference version.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -56,8 +57,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
*/
N = nrow(X)
F = nrow(W)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
# Create output volume
out = matrix(0, rows=N, cols=F*Hout*Wout)
@@ -71,14 +72,14 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
}
# Convolve image with filters
parfor (f in 1:F, check=0) { # all filters
parfor (hout in 1:Hout, check=0) { # all output rows
- h0 = (hout-1) * strideh + 1
+ h0 = (hout-1)*strideh + 1
parfor (wout in 1:Wout, check=0) { # all output columns
- w0 = (wout-1) * stridew + 1
+ w0 = (wout-1)*stridew + 1
# Create a patch of the input example corresponding spatially to the filter sizes
Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
parfor (c in 1:C, check=0) {
@@ -106,12 +107,13 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* This implementation is intended to be a simple, reference version.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
- * - X: Previous input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -123,14 +125,14 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* - padw: Padding for left and right sides.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt W, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt b, of shape (F, 1).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
*/
N = nrow(X)
F = nrow(W)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
# Create gradient volumes
dX = matrix(0, rows=N, cols=C*Hin*Win)
@@ -146,7 +148,7 @@ backward = function(matrix[double] dout, int Hout, int Wout,
Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
}
dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
for (f in 1:F) { # all filters
@@ -191,10 +193,11 @@ init = function(int F, int C, int Hf, int Wf)
/*
* Initialize the parameters of this layer.
*
- * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
- * which limits the magnification of inputs/gradients during
- * forward/backward passes by scaling unit-Gaussian weights by a
- * factor of sqrt(2/n), under the assumption of relu neurons.
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
*
* Inputs:
* - F: Number of filters.
@@ -203,8 +206,8 @@ init = function(int F, int C, int Hf, int Wf)
* - Wf: Filter width.
*
* Outputs:
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
*/
W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
b = matrix(0, rows=F, cols=1)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index 6b90d56..adc1c9a 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -117,7 +117,7 @@ affine = function() {
outph = affine::forward(X, W, b)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -136,7 +136,7 @@ affine = function() {
outph = affine::forward(X, W, b)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -155,7 +155,7 @@ affine = function() {
outph = affine::forward(X, W, b)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -214,7 +214,7 @@ batch_norm = function() {
batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -235,7 +235,7 @@ batch_norm = function() {
batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
gamma[i,j] = old # reset
- dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
@@ -256,7 +256,7 @@ batch_norm = function() {
batch_norm::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
beta[i,j] = old # reset
- dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
@@ -307,7 +307,7 @@ conv = function() {
[outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -326,7 +326,7 @@ conv = function() {
[outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -345,7 +345,7 @@ conv = function() {
[outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -355,7 +355,8 @@ conv = function() {
conv_builtin = function() {
/*
- * Gradient check for the convolutional layer using built-in functions.
+ * Gradient check for the convolutional layer using built-in
+ * functions.
*/
print("Grad checking the built-in convolutional layer with L2 loss.")
@@ -397,7 +398,7 @@ conv_builtin = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -418,7 +419,7 @@ conv_builtin = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -439,7 +440,7 @@ conv_builtin = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -491,7 +492,7 @@ conv_simple = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -512,7 +513,7 @@ conv_simple = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -533,7 +534,7 @@ conv_simple = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -569,7 +570,7 @@ cross_entropy_loss = function() {
pred[i,j] = old + h
lossph = cross_entropy_loss::forward(pred, y)
pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
@@ -609,7 +610,7 @@ dropout = function() {
[outph, mask] = dropout::forward(X, p, seed)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -643,7 +644,7 @@ l1_loss = function() {
pred[i,j] = old + h
lossph = l1_loss::forward(pred, y)
pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
@@ -677,7 +678,7 @@ l1_reg = function() {
W[i,j] = old + h
reg_lossph = l1_reg::forward(W, lambda)
W[i,j] = old # reset W[i,j]
- dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative
+ dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
@@ -711,7 +712,7 @@ l2_loss = function() {
pred[i,j] = old + h
lossph = l2_loss::forward(pred, y)
pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
@@ -745,7 +746,7 @@ l2_reg = function() {
W[i,j] = old + h
reg_lossph = l2_reg::forward(W, lambda)
W[i,j] = old # reset W[i,j]
- dW_num = (reg_lossph - reg_lossmh) / (2 * h) # numerical derivative
+ dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
@@ -779,7 +780,7 @@ log_loss = function() {
pred[i,j] = old + h
lossph = log_loss::forward(pred, y)
pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
@@ -831,7 +832,7 @@ lstm = function() {
loss_cph = l2_loss::forward(cph, yc)
lossph = loss_outph + loss_cph
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -854,7 +855,7 @@ lstm = function() {
loss_cph = l2_loss::forward(cph, yc)
lossph = loss_outph + loss_cph
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -877,7 +878,7 @@ lstm = function() {
loss_cph = l2_loss::forward(cph, yc)
lossph = loss_outph + loss_cph
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -900,7 +901,7 @@ lstm = function() {
loss_cph = l2_loss::forward(cph, yc)
lossph = loss_outph + loss_cph
out0[i,j] = old # reset
- dout0_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
@@ -923,7 +924,7 @@ lstm = function() {
loss_cph = l2_loss::forward(cph, yc)
lossph = loss_outph + loss_cph
c0[i,j] = old # reset
- dc0_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dc0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
@@ -949,8 +950,8 @@ max_pool = function() {
for (pad in 0:1) {
print(" - Grad checking w/ pad="+pad+".")
- Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1)
- Wout = as.integer((Win + 2 * pad - Wf) / stride + 1)
+ Hout = as.integer((Hin + 2*pad - Hf)/stride + 1)
+ Wout = as.integer((Win + 2*pad - Wf)/stride + 1)
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
@@ -971,7 +972,7 @@ max_pool = function() {
[outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1023,7 +1024,7 @@ max_pool_builtin = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1050,8 +1051,8 @@ max_pool_simple = function() {
for (pad in 0:1) {
print(" - Grad checking w/ pad="+pad+".")
- Hout = as.integer((Hin + 2 * pad - Hf) / stride + 1)
- Wout = as.integer((Win + 2 * pad - Wf) / stride + 1)
+ Hout = as.integer((Hin + 2*pad - Hf)/stride + 1)
+ Wout = as.integer((Win + 2*pad - Wf)/stride + 1)
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
@@ -1075,7 +1076,7 @@ max_pool_simple = function() {
pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1121,7 +1122,7 @@ relu = function() {
outph = relu::forward(X)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1165,7 +1166,7 @@ rnn = function() {
[outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1184,7 +1185,7 @@ rnn = function() {
[outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
- dW_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
@@ -1203,7 +1204,7 @@ rnn = function() {
[outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
- db_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
@@ -1222,7 +1223,7 @@ rnn = function() {
[outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
lossph = l2_loss::forward(outph, y)
out0[i,j] = old # reset
- dout0_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
@@ -1260,7 +1261,7 @@ sigmoid = function() {
outph = sigmoid::forward(X)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1299,7 +1300,7 @@ softmax = function() {
outph = softmax::forward(X)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1364,7 +1365,7 @@ spatial_batch_norm = function() {
ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1387,7 +1388,7 @@ spatial_batch_norm = function() {
ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
gamma[i,j] = old # reset
- dgamma_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
@@ -1410,7 +1411,7 @@ spatial_batch_norm = function() {
ema_mean, ema_var, mu, eps)
lossph = l2_loss::forward(outph, y)
beta[i,j] = old # reset
- dbeta_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
@@ -1421,7 +1422,8 @@ spatial_batch_norm = function() {
tanh = function() {
/*
- * Gradient check for the hyperbolic tangent (tanh) nonlinearity layer.
+ * Gradient check for the hyperbolic tangent (tanh) nonlinearity
+ * layer.
*/
print("Grad checking the tanh nonlinearity layer with L2 loss.")
@@ -1449,7 +1451,7 @@ tanh = function() {
outph = tanh::forward(X)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1517,7 +1519,7 @@ two_layer_affine_l2_net = function() {
X[i,j] = old_x + h
[lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
X[i,j] = old_x # reset X[i,j]
- dX_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
@@ -1534,7 +1536,7 @@ two_layer_affine_l2_net = function() {
W1[i,j] = old_w + h
[lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
W1[i,j] = old_w # reset W[i,j]
- dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
@@ -1551,7 +1553,7 @@ two_layer_affine_l2_net = function() {
W2[i,j] = old_w + h
[lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
W2[i,j] = old_w # reset W[i,j]
- dWij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
@@ -1568,7 +1570,7 @@ two_layer_affine_l2_net = function() {
b1[i,j] = old_b + h
[lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
b1[i,j] = old_b # reset b[1,j]
- dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
@@ -1585,7 +1587,7 @@ two_layer_affine_l2_net = function() {
b2[i,j] = old_b + h
[lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
b2[i,j] = old_b # reset b[1,j]
- dbij_num = (lossph - lossmh) / (2 * h) # numerical derivative
+ dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
index 4394ffd..786b0a1 100644
--- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
@@ -24,6 +24,7 @@
*
* This implementation is intended to be a simple, reference version.
*/
+
forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
int strideh, int stridew, int padh, int padw)
return (matrix[double] out, int Hout, int Wout) {
@@ -35,7 +36,7 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* This implementation is intended to be a simple, reference version.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -54,8 +55,8 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* - Wout: Output width.
*/
N = nrow(X)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
# Create output volume
out = matrix(0, rows=N, cols=C*Hout*Wout)
@@ -99,10 +100,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* unrolled into a single vector.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, C*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -116,7 +118,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* A typical value is 0.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
*/
N = nrow(X)
@@ -134,7 +136,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c, ] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
}
img = Xn_padded
@@ -162,7 +164,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
parfor (c in 1:C, check=0) {
dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c, ] = matrix(dXn_slice, rows=1, cols=Hin*Win)
+ dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
}
dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml
index dd0ac19..6b86225 100644
--- a/scripts/staging/SystemML-NN/nn/util.dml
+++ b/scripts/staging/SystemML-NN/nn/util.dml
@@ -22,14 +22,15 @@
/*
* Utility functions.
*/
+
all_equal = function(matrix[double] X1, matrix[double] X2)
return(boolean equivalent) {
/*
* Determine if two matrices are equivalent.
*
* Inputs:
- * - X1: Input matrix, of shape (any, any).
- * - X2: Input matrix, of same shape as X1.
+ * - X1: Inputs, of shape (any, any).
+ * - X2: Inputs, of same shape as X1.
*
* Outputs:
* - equivalent: Whether or not the two matrices are equivalent.
@@ -42,12 +43,12 @@ check_all_equal = function(matrix[double] X1, matrix[double] X2)
/*
* Check if two matrices are equivalent, and report any issues.
*
- * - Issues an "ERROR" statement if elements of the two matrices
- * are not equal.
+ * Issues an "ERROR" statement if elements of the two matrices are
+ * not equal.
*
* Inputs:
- * - X1: Input matrix, of shape (any, any).
- * - X2: Input matrix, of same shape as X1.
+ * - X1: Inputs, of shape (any, any).
+ * - X2: Inputs, of same shape as X1.
*
* Outputs:
* - equivalent: Whether or not the two matrices are equivalent.
@@ -61,7 +62,8 @@ check_all_equal = function(matrix[double] X1, matrix[double] X2)
}
}
-compute_rel_error = function(double x1, double x2) return (double rel_error) {
+compute_rel_error = function(double x1, double x2)
+ return (double rel_error) {
/*
* Relative error measure between two values.
*
@@ -74,7 +76,7 @@ compute_rel_error = function(double x1, double x2) return (double rel_error) {
* Outputs:
* - rel_error: Relative error measure between the two values.
*/
- rel_error = abs(x1 - x2) / max(1e-8, abs(x1) + abs(x2))
+ rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
}
check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
@@ -83,10 +85,12 @@ check_rel_error = function(double x1, double x2, double thresh_error, double thr
* Check and report any issues with the relative error measure between
* two values.
*
- * - Issues an "ERROR" statement for relative errors > thresh_error,
- * indicating that the implementation is likely incorrect.
- * - Issues a "WARNING" statement for relative errors < thresh_error
- * but > thresh_warn, indicating that the implementation may be incorrect.
+ * Issues an "ERROR" statement for relative errors > thresh_error,
+ * indicating that the implementation is likely incorrect.
+ *
+ * Issues a "WARNING" statement for relative errors < thresh_error
+ * but > thresh_warn, indicating that the implementation may be
+ * incorrect.
*
* Inputs:
* - x1: First value.
@@ -117,7 +121,7 @@ channel_sums = function(matrix[double] X, int C, int Hin, int Win)
* Computes a channel-wise summation over a 4D input.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -152,16 +156,16 @@ im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int stri
* out into columns, of shape (C*Hf*Wf, Hout*Wout).
*/
C = nrow(img)
- Hout = as.integer((Hin - Hf) / strideh + 1)
- Wout = as.integer((Win - Wf) / stridew + 1)
+ Hout = as.integer((Hin-Hf)/strideh + 1)
+ Wout = as.integer((Win-Wf)/stridew + 1)
# Note: We start with `img_cols` transposed to allow for row-major
# left-indexing inside the loop, which is more performant.
img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf) # zeros
parfor (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
+ hin = (hout-1)*strideh + 1
parfor (wout in 1:Wout, check=0) { # all output columns
- win = (wout-1) * stridew + 1
+ win = (wout-1)*stridew + 1
# Extract a local patch of the input image corresponding spatially to the filter sizes.
img_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
parfor (c in 1:C) { # all channels
@@ -207,14 +211,14 @@ col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int
* Outputs:
* - img: Input image, of shape (C, Hin*Win).
*/
- Hout = as.integer((Hin - Hf) / strideh + 1)
- Wout = as.integer((Win - Wf) / stridew + 1)
+ Hout = as.integer((Hin-Hf)/strideh + 1)
+ Wout = as.integer((Win-Wf)/stridew + 1)
img = matrix(0, rows=C, cols=Hin*Win) # zeros
for (hout in 1:Hout) { # all output rows
- hin = (hout-1) * strideh + 1
+ hin = (hout-1)*strideh + 1
for (wout in 1:Wout) { # all output columns
- win = (wout-1) * stridew + 1
+ win = (wout-1)*stridew + 1
# Extract a local patch of the input image corresponding spatially to the filter sizes.
img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf) # zeros
parfor (c in 1:C) { # all channels
[4/7] incubator-systemml git commit: [SYSTEMML-1413] Extract
test-only utilities from `nn/util.dml`
Posted by du...@apache.org.
[SYSTEMML-1413] Extract test-only utilities from `nn/util.dml`
This commit extracts utility functions only used for testing from
`nn/util.dml` to a new `nn/test/util.dml`.
Closes #447.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5c59e03b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5c59e03b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5c59e03b
Branch: refs/heads/master
Commit: 5c59e03b4caca3a519ec871475d2081bff16fd3a
Parents: 7744924
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:39:04 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:39:04 2017 -0700
----------------------------------------------------------------------
.../staging/SystemML-NN/nn/test/grad_check.dml | 138 +++++++----------
scripts/staging/SystemML-NN/nn/test/test.dml | 47 +++---
scripts/staging/SystemML-NN/nn/test/util.dml | 155 +++++++++++++++++++
scripts/staging/SystemML-NN/nn/util.dml | 92 -----------
4 files changed, 232 insertions(+), 200 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index adc1c9a..ba9a317 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -44,45 +44,7 @@ source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
source("nn/test/conv_simple.dml") as conv_simple
source("nn/test/max_pool_simple.dml") as max_pool_simple
-source("nn/util.dml") as util
-
-check_rel_error = function(double dw_a, double dw_n, double lossph, double lossmh)
- return (double rel_error) {
- /*
- * Check and report any issues with the relative error measure between
- * the analytical and numerical partial derivatives.
- *
- * - Issues an "ERROR" statement for relative errors > 1e-2,
- * indicating that the gradient is likely incorrect.
- * - Issues a "WARNING" statement for relative errors < 1e-2
- * but > 1e-4, indicating that the may be incorrect.
- *
- * Inputs:
- * - dw_a: Analytical partial derivative wrt w.
- * - dw_n: Numerical partial derivative wrt w.
- * - lossph: Loss evaluated with w set to w+h.
- * - lossmh: Loss evaluated with w set to w-h.
- *
- * Outputs:
- * - rel_error: Relative error measure between the two derivatives.
- */
- # Compute relative error
- rel_error = util::compute_rel_error(dw_a, dw_n)
-
- # Evaluate relative error
- thresh_error = 1e-2
- thresh_warn = 1e-4
- if (rel_error > thresh_error) {
- print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a +
- " analytical vs " + dw_n + " numerical, with lossph " + lossph +
- " and lossmh " + lossmh)
- }
- else if (rel_error > thresh_warn & rel_error <= thresh_error) {
- print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
- " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph +
- " and lossmh " + lossmh)
- }
-}
+source("nn/test/util.dml") as test_util
affine = function() {
/*
@@ -120,7 +82,7 @@ affine = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -139,7 +101,7 @@ affine = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -158,7 +120,7 @@ affine = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
}
@@ -217,7 +179,7 @@ batch_norm = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -238,7 +200,8 @@ batch_norm = function() {
dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
+ lossph, lossmh)
}
}
@@ -259,7 +222,8 @@ batch_norm = function() {
dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
+ lossph, lossmh)
}
}
}
@@ -310,7 +274,7 @@ conv = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -329,7 +293,7 @@ conv = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -348,7 +312,7 @@ conv = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
}
@@ -401,7 +365,7 @@ conv_builtin = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -422,7 +386,7 @@ conv_builtin = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -443,7 +407,7 @@ conv_builtin = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
}
@@ -495,7 +459,7 @@ conv_simple = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -516,7 +480,7 @@ conv_simple = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -537,7 +501,7 @@ conv_simple = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
}
@@ -573,7 +537,7 @@ cross_entropy_loss = function() {
dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
}
}
}
@@ -613,7 +577,7 @@ dropout = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -647,7 +611,7 @@ l1_loss = function() {
dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
}
}
}
@@ -681,7 +645,8 @@ l1_reg = function() {
dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
+ reg_lossph, reg_lossmh)
}
}
}
@@ -715,7 +680,7 @@ l2_loss = function() {
dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
}
}
}
@@ -749,7 +714,8 @@ l2_reg = function() {
dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
+ reg_lossph, reg_lossmh)
}
}
}
@@ -783,7 +749,7 @@ log_loss = function() {
dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
}
}
}
@@ -835,7 +801,7 @@ lstm = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -858,7 +824,7 @@ lstm = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -881,7 +847,7 @@ lstm = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
@@ -904,7 +870,7 @@ lstm = function() {
dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
}
}
@@ -927,7 +893,7 @@ lstm = function() {
dc0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
}
}
}
@@ -975,7 +941,7 @@ max_pool = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1027,7 +993,7 @@ max_pool_builtin = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1079,7 +1045,7 @@ max_pool_simple = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1125,7 +1091,7 @@ relu = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1169,7 +1135,7 @@ rnn = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -1188,7 +1154,7 @@ rnn = function() {
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
}
}
@@ -1207,7 +1173,7 @@ rnn = function() {
db_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
}
}
@@ -1226,7 +1192,7 @@ rnn = function() {
dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
}
}
}
@@ -1264,7 +1230,7 @@ sigmoid = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1303,7 +1269,7 @@ softmax = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1368,7 +1334,7 @@ spatial_batch_norm = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -1391,7 +1357,8 @@ spatial_batch_norm = function() {
dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
+ lossph, lossmh)
}
}
@@ -1414,7 +1381,8 @@ spatial_batch_norm = function() {
dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
+ lossph, lossmh)
}
}
}
@@ -1454,7 +1422,7 @@ tanh = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
}
@@ -1522,7 +1490,7 @@ two_layer_affine_l2_net = function() {
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
}
}
@@ -1539,7 +1507,7 @@ two_layer_affine_l2_net = function() {
dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
}
}
@@ -1556,7 +1524,7 @@ two_layer_affine_l2_net = function() {
dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
}
}
@@ -1573,7 +1541,7 @@ two_layer_affine_l2_net = function() {
dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
}
}
@@ -1590,7 +1558,7 @@ two_layer_affine_l2_net = function() {
dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
# Check error
- rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
+ rel_error = test_util::check_rel_grad_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index b25fae2..8fb0d04 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -32,6 +32,7 @@ source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
source("nn/test/conv_simple.dml") as conv_simple
source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/util.dml") as test_util
source("nn/util.dml") as util
batch_norm = function() {
@@ -62,7 +63,7 @@ batch_norm = function() {
1.34160733 1.34160721 1.34160733 1.34160733", rows=1, cols=N*D)
out = matrix(out, rows=1, cols=N*D)
for (i in 1:length(out)) {
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(target[1,i]), 1e-3, 1e-4)
}
}
@@ -100,9 +101,9 @@ conv = function() {
out_simple = matrix(out_simple, rows=1, cols=N*F*Hout*Wout)
out_builtin = matrix(out_builtin, rows=1, cols=N*F*Hout*Wout)
for (i in 1:length(out)) {
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(out_simple[1,i]), 1e-10, 1e-12)
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
}
}
@@ -160,7 +161,7 @@ im2col = function() {
x_pad2 = util::col2im(x_cols, C, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride, "none")
# Equivalency check
- equivalent = util::all_equal(x_pad, x_pad2)
+ equivalent = test_util::all_equal(x_pad, x_pad2)
if (!equivalent) {
print("ERROR: im2col and then col2im does not yield the original image.")
}
@@ -199,7 +200,7 @@ padding = function() {
x1 = util::unpad_image(x_pad, Hin, Win, pad, pad)
# Equivalency check
- equivalent = util::all_equal(x, x1)
+ equivalent = test_util::all_equal(x, x1)
if (!equivalent) {
print("ERROR: Padding and then unpadding does not yield the original image.")
}
@@ -238,9 +239,9 @@ max_pool = function() {
out_simple = matrix(out_simple, rows=1, cols=N*C*Hout*Wout)
out_builtin = matrix(out_builtin, rows=1, cols=N*C*Hout*Wout)
for (i in 1:length(out)) {
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(out_simple[1,i]), 1e-10, 1e-12)
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
}
@@ -257,9 +258,9 @@ max_pool = function() {
dX_simple = matrix(dX_simple, rows=1, cols=N*C*Hin*Win)
dX_builtin = matrix(dX_builtin, rows=1, cols=N*C*Hin*Win)
for (i in 1:length(dX)) {
- rel_error = util::check_rel_error(as.scalar(dX[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
as.scalar(dX_simple[1,i]), 1e-10, 1e-12)
- rel_error = util::check_rel_error(as.scalar(dX[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
as.scalar(dX_builtin[1,i]), 1e-10, 1e-12)
}
}
@@ -302,9 +303,9 @@ max_pool = function() {
# 8 16
target = matrix("6 8 14 16 6 14 8 16", rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
- tmp = util::check_all_equal(out, target)
- tmp = util::check_all_equal(out_simple, target)
- tmp = util::check_all_equal(out_builtin, target)
+ tmp = test_util::check_all_equal(out, target)
+ tmp = test_util::check_all_equal(out_simple, target)
+ tmp = test_util::check_all_equal(out_builtin, target)
print(" - Testing for correct behavior against known answer w/ pad=1.")
# generate data
@@ -342,9 +343,9 @@ max_pool = function() {
# 4 12 16
target = matrix("1 3 4 9 11 12 13 15 16 1 9 13 3 11 15 4 12 16", rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
- tmp = util::check_all_equal(out, target)
- tmp = util::check_all_equal(out_simple, target)
- tmp = util::check_all_equal(out_builtin, target)
+ tmp = test_util::check_all_equal(out, target)
+ tmp = test_util::check_all_equal(out_simple, target)
+ tmp = test_util::check_all_equal(out_builtin, target)
print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=0.")
# generate data
@@ -377,9 +378,9 @@ max_pool = function() {
# -3 -11
target = matrix("-1 -3 -9 -11 -1 -9 -3 -11", rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
- tmp = util::check_all_equal(out, target)
- tmp = util::check_all_equal(out_simple, target)
- tmp = util::check_all_equal(out_builtin, target)
+ tmp = test_util::check_all_equal(out, target)
+ tmp = test_util::check_all_equal(out_simple, target)
+ tmp = test_util::check_all_equal(out_builtin, target)
print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=1.")
@@ -418,9 +419,9 @@ max_pool = function() {
# 0 0 0
target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
- tmp = util::check_all_equal(out, target)
- tmp = util::check_all_equal(out_simple, target)
- tmp = util::check_all_equal(out_builtin, target)
+ tmp = test_util::check_all_equal(out, target)
+ tmp = test_util::check_all_equal(out_simple, target)
+ tmp = test_util::check_all_equal(out_builtin, target)
}
spatial_batch_norm = function() {
@@ -509,7 +510,7 @@ spatial_batch_norm = function() {
cols=N*C*Hin*Win)
out = matrix(out, rows=1, cols=N*C*Hin*Win)
for (i in 1:length(out)) {
- rel_error = util::check_rel_error(as.scalar(out[1,i]),
+ rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
as.scalar(target[1,i]), 1e-3, 1e-4)
}
}
@@ -531,7 +532,7 @@ tanh = function() {
# Equivalency check
for (i in 1:nrow(out)) {
for (j in 1:ncol(out)) {
- rel_error = util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
+ rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
}
}
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/util.dml b/scripts/staging/SystemML-NN/nn/test/util.dml
new file mode 100644
index 0000000..128e4db
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/util.dml
@@ -0,0 +1,155 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Test utility functions.
+ */
+
+all_equal = function(matrix[double] X1, matrix[double] X2)
+ return(boolean equivalent) {
+ /*
+ * Determine if two matrices are equivalent.
+ *
+ * Inputs:
+ * - X1: Inputs, of shape (any, any).
+ * - X2: Inputs, of same shape as X1.
+ *
+ * Outputs:
+ * - equivalent: Whether or not the two matrices are equivalent.
+ */
+ equivalent = as.logical(prod(X1 == X2))
+}
+
+check_all_equal = function(matrix[double] X1, matrix[double] X2)
+ return(boolean equivalent) {
+ /*
+ * Check if two matrices are equivalent, and report any issues.
+ *
+ * Issues an "ERROR" statement if elements of the two matrices are
+ * not equal.
+ *
+ * Inputs:
+ * - X1: Inputs, of shape (any, any).
+ * - X2: Inputs, of same shape as X1.
+ *
+ * Outputs:
+ * - equivalent: Whether or not the two matrices are equivalent.
+ */
+ # Determine if matrices are equivalent
+ equivalent = all_equal(X1, X2)
+
+ # Evaluate relative error
+ if (!equivalent) {
+ print("ERROR: The two matrices are not equivalent.")
+ }
+}
+
+compute_rel_error = function(double x1, double x2)
+ return (double rel_error) {
+ /*
+ * Relative error measure between two values.
+ *
+ * Uses smoothing to avoid divide-by-zero errors.
+ *
+ * Inputs:
+ * - x1: First value.
+ * - x2: Second value.
+ *
+ * Outputs:
+ * - rel_error: Relative error measure between the two values.
+ */
+ rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
+}
+
+check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
+ return (double rel_error) {
+ /*
+ * Check and report any issues with the relative error measure between
+ * two values.
+ *
+ * Issues an "ERROR" statement for relative errors > thresh_error,
+ * indicating that the implementation is likely incorrect.
+ *
+ * Issues a "WARNING" statement for relative errors < thresh_error
+ * but > thresh_warn, indicating that the implementation may be
+ * incorrect.
+ *
+ * Inputs:
+ * - x1: First value.
+ * - x2: Second value.
+ * - thresh_error: Error threshold.
+ * - thresh_warn: Warning threshold.
+ *
+ * Outputs:
+ * - rel_error: Relative error measure between the two values.
+ */
+ # Compute relative error
+ rel_error = compute_rel_error(x1, x2)
+
+ # Evaluate relative error
+ if (rel_error > thresh_error) {
+ print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 +
+ " vs " + x2 + ".")
+ }
+ else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+ print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+ " with " + x1 + " vs " + x2 + ".")
+ }
+}
+
+check_rel_grad_error = function(double dw_a, double dw_n, double lossph, double lossmh)
+ return (double rel_error) {
+ /*
+ * Check and report any issues with the relative error measure between
+ * the analytical and numerical partial derivatives.
+ *
+ * - Issues an "ERROR" statement for relative errors > 1e-2,
+ * indicating that the gradient is likely incorrect.
+ * - Issues a "WARNING" statement for relative errors < 1e-2
+ * but > 1e-4, indicating that the may be incorrect.
+ *
+ * Inputs:
+ * - dw_a: Analytical partial derivative wrt w.
+ * - dw_n: Numerical partial derivative wrt w.
+ * - lossph: Loss evaluated with w set to w+h.
+ * - lossmh: Loss evaluated with w set to w-h.
+ *
+ * Outputs:
+ * - rel_error: Relative error measure between the two derivatives.
+ */
+ # Compute relative error
+ rel_error = compute_rel_error(dw_a, dw_n)
+
+ # Evaluate relative error
+ thresh_error = 1e-2
+ thresh_warn = 1e-4
+ if (rel_error > thresh_error) {
+ print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a +
+ " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+ " and lossmh " + lossmh)
+ }
+ else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+ print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+ " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+ " and lossmh " + lossmh)
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml
index 6b86225..405d208 100644
--- a/scripts/staging/SystemML-NN/nn/util.dml
+++ b/scripts/staging/SystemML-NN/nn/util.dml
@@ -23,98 +23,6 @@
* Utility functions.
*/
-all_equal = function(matrix[double] X1, matrix[double] X2)
- return(boolean equivalent) {
- /*
- * Determine if two matrices are equivalent.
- *
- * Inputs:
- * - X1: Inputs, of shape (any, any).
- * - X2: Inputs, of same shape as X1.
- *
- * Outputs:
- * - equivalent: Whether or not the two matrices are equivalent.
- */
- equivalent = as.logical(prod(X1 == X2))
-}
-
-check_all_equal = function(matrix[double] X1, matrix[double] X2)
- return(boolean equivalent) {
- /*
- * Check if two matrices are equivalent, and report any issues.
- *
- * Issues an "ERROR" statement if elements of the two matrices are
- * not equal.
- *
- * Inputs:
- * - X1: Inputs, of shape (any, any).
- * - X2: Inputs, of same shape as X1.
- *
- * Outputs:
- * - equivalent: Whether or not the two matrices are equivalent.
- */
- # Determine if matrices are equivalent
- equivalent = all_equal(X1, X2)
-
- # Evaluate relative error
- if (!equivalent) {
- print("ERROR: The two matrices are not equivalent.")
- }
-}
-
-compute_rel_error = function(double x1, double x2)
- return (double rel_error) {
- /*
- * Relative error measure between two values.
- *
- * Uses smoothing to avoid divide-by-zero errors.
- *
- * Inputs:
- * - x1: First value.
- * - x2: Second value.
- *
- * Outputs:
- * - rel_error: Relative error measure between the two values.
- */
- rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
-}
-
-check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
- return (double rel_error) {
- /*
- * Check and report any issues with the relative error measure between
- * two values.
- *
- * Issues an "ERROR" statement for relative errors > thresh_error,
- * indicating that the implementation is likely incorrect.
- *
- * Issues a "WARNING" statement for relative errors < thresh_error
- * but > thresh_warn, indicating that the implementation may be
- * incorrect.
- *
- * Inputs:
- * - x1: First value.
- * - x2: Second value.
- * - thresh_error: Error threshold.
- * - thresh_warn: Warning threshold.
- *
- * Outputs:
- * - rel_error: Relative error measure between the two values.
- */
- # Compute relative error
- rel_error = compute_rel_error(x1, x2)
-
- # Evaluate relative error
- if (rel_error > thresh_error) {
- print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 +
- " vs " + x2 + ".")
- }
- else if (rel_error > thresh_warn & rel_error <= thresh_error) {
- print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
- " with " + x1 + " vs " + x2 + ".")
- }
-}
-
channel_sums = function(matrix[double] X, int C, int Hin, int Win)
return (matrix[double] out) {
/*
[2/7] incubator-systemml git commit: [SYSTEMML-1452] General code
cleanup of SystemML-NN
Posted by du...@apache.org.
[SYSTEMML-1452] General code cleanup of SystemML-NN
This commmit performs a general code & documentation cleanup across the
library.
Closes #447.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/16b1cbd7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/16b1cbd7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/16b1cbd7
Branch: refs/heads/master
Commit: 16b1cbd72601afbed0b19c1d4125a898fd324b1c
Parents: 2e48d95
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:38:15 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:38:16 2017 -0700
----------------------------------------------------------------------
projects/breast_cancer/hyperparam_tuning.dml | 8 +-
projects/breast_cancer/softmax_clf.dml | 16 +--
.../staging/SystemML-NN/nn/layers/affine.dml | 36 ++++---
.../SystemML-NN/nn/layers/batch_norm.dml | 17 +--
scripts/staging/SystemML-NN/nn/layers/conv.dml | 50 ++++-----
.../SystemML-NN/nn/layers/conv_builtin.dml | 63 ++++++-----
.../nn/layers/cross_entropy_loss.dml | 29 +++--
.../staging/SystemML-NN/nn/layers/dropout.dml | 23 ++--
.../staging/SystemML-NN/nn/layers/l1_loss.dml | 29 +++--
.../staging/SystemML-NN/nn/layers/l1_reg.dml | 15 +--
.../staging/SystemML-NN/nn/layers/l2_loss.dml | 29 +++--
.../staging/SystemML-NN/nn/layers/l2_reg.dml | 15 +--
.../staging/SystemML-NN/nn/layers/log_loss.dml | 40 ++++---
scripts/staging/SystemML-NN/nn/layers/lstm.dml | 65 ++++++------
.../staging/SystemML-NN/nn/layers/max_pool.dml | 15 +--
.../SystemML-NN/nn/layers/max_pool_builtin.dml | 14 +--
scripts/staging/SystemML-NN/nn/layers/relu.dml | 22 ++--
scripts/staging/SystemML-NN/nn/layers/rnn.dml | 43 ++++----
.../staging/SystemML-NN/nn/layers/sigmoid.dml | 30 ++++--
.../staging/SystemML-NN/nn/layers/softmax.dml | 29 ++---
.../nn/layers/spatial_batch_norm.dml | 12 +--
scripts/staging/SystemML-NN/nn/layers/tanh.dml | 28 ++---
.../staging/SystemML-NN/nn/optim/adagrad.dml | 22 ++--
scripts/staging/SystemML-NN/nn/optim/adam.dml | 38 +++----
.../staging/SystemML-NN/nn/optim/rmsprop.dml | 24 +++--
scripts/staging/SystemML-NN/nn/optim/sgd.dml | 12 ++-
.../SystemML-NN/nn/optim/sgd_momentum.dml | 24 +++--
.../SystemML-NN/nn/optim/sgd_nesterov.dml | 23 ++--
.../staging/SystemML-NN/nn/test/conv_simple.dml | 51 ++++-----
.../staging/SystemML-NN/nn/test/grad_check.dml | 106 ++++++++++---------
.../SystemML-NN/nn/test/max_pool_simple.dml | 18 ++--
scripts/staging/SystemML-NN/nn/util.dml | 46 ++++----
32 files changed, 549 insertions(+), 443 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/projects/breast_cancer/hyperparam_tuning.dml
----------------------------------------------------------------------
diff --git a/projects/breast_cancer/hyperparam_tuning.dml b/projects/breast_cancer/hyperparam_tuning.dml
index 464c659..4f054c3 100644
--- a/projects/breast_cancer/hyperparam_tuning.dml
+++ b/projects/breast_cancer/hyperparam_tuning.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -66,7 +66,9 @@ parfor(j in 1:10000) {
log_interval = 10
# Train
- [Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2] = clf::train(X, Y, X_val, Y_val, C, Hin, Win, lr, mu, decay, lambda, batch_size, epochs, log_interval, dir)
+ [Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2] =
+ clf::train(X, Y, X_val, Y_val, C, Hin, Win, lr, mu, decay, lambda, batch_size, epochs,
+ log_interval, dir)
# Eval
#probs = clf::predict(X, C, Hin, Win, Wc1, bc1, Wc2, bc2, Wc3, bc3, Wa1, ba1, Wa2, ba2)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/projects/breast_cancer/softmax_clf.dml
----------------------------------------------------------------------
diff --git a/projects/breast_cancer/softmax_clf.dml b/projects/breast_cancer/softmax_clf.dml
index e106a36..35fd545 100644
--- a/projects/breast_cancer/softmax_clf.dml
+++ b/projects/breast_cancer/softmax_clf.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -79,7 +79,7 @@ train = function(matrix[double] X, matrix[double] Y,
accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(Y_val))
# Output results
print("Start: Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
-
+
# Optimize
print("Starting optimization")
iters = ceil(N / batch_size)
@@ -152,7 +152,7 @@ predict = function(matrix[double] X, matrix[double] W, matrix[double] b)
*/
N = nrow(X) # num examples
K = ncol(W) # num classes
-
+
# Compute forward pass
## affine & softmax:
out = affine::forward(X, W, b)
@@ -185,7 +185,7 @@ eval = function(matrix[double] probs, matrix[double] Y)
generate_dummy_data = function()
return (matrix[double] X, matrix[double] Y, int C, int Hin, int Win) {
/*
- * Generate a dummy dataset similar to the MNIST dataset.
+ * Generate a dummy dataset similar to the breast cancer dataset.
*
* Outputs:
* - X: Input data matrix, of shape (N, D).
@@ -196,9 +196,9 @@ generate_dummy_data = function()
*/
# Generate dummy input data
N = 1024 # num examples
- C = 1 # num input channels
- Hin = 28 # input height
- Win = 28 # input width
+ C = 3 # num input channels
+ Hin = 256 # input height
+ Win = 256 # input width
T = 10 # num targets
X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
classes = round(rand(rows=N, cols=1, min=1, max=T, pdf="uniform"))
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/affine.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/affine.dml b/scripts/staging/SystemML-NN/nn/layers/affine.dml
index 6a4c210..f9f8559 100644
--- a/scripts/staging/SystemML-NN/nn/layers/affine.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/affine.dml
@@ -22,6 +22,7 @@
/*
* Fully-connected (affine) layer.
*/
+
forward = function(matrix[double] X, matrix[double] W, matrix[double] b)
return (matrix[double] out) {
/*
@@ -29,9 +30,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b)
* M neurons. The input data has N examples, each with D features.
*
* Inputs:
- * - X: Input data matrix, of shape (N, D).
- * - W: Weights (parameters) matrix, of shape (D, M).
- * - b: Biases vector, of shape (1, M).
+ * - X: Inputs, of shape (N, D).
+ * - W: Weights, of shape (D, M).
+ * - b: Biases, of shape (1, M).
*
* Outputs:
* - out: Outputs, of shape (N, M).
@@ -47,15 +48,15 @@ backward = function(matrix[double] dout, matrix[double] X,
* with M neurons.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, M).
- * - X: Previous input data matrix, of shape (N, D).
- * - W: Weights (parameters) matrix, of shape (D, M).
- * - b: Biases vector, of shape (1, M).
+ * - dout: Gradient wrt `out` from upstream, of shape (N, M).
+ * - X: Inputs, of shape (N, D).
+ * - W: Weights, of shape (D, M).
+ * - b: Biases, of shape (1, M).
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, D).
- * - dW: Gradient wrt W, of shape (D, M).
- * - db: Gradient wrt b, of shape (1, M).
+ * - dX: Gradient wrt `X`, of shape (N, D).
+ * - dW: Gradient wrt `W`, of shape (D, M).
+ * - db: Gradient wrt `b`, of shape (1, M).
*/
dX = dout %*% t(W)
dW = t(X) %*% dout
@@ -70,18 +71,19 @@ init = function(int D, int M)
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
- * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
- * which limits the magnification of inputs/gradients during
- * forward/backward passes by scaling unit-Gaussian weights by a
- * factor of sqrt(2/n), under the assumption of relu neurons.
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
*
* Inputs:
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - M: Number of neurons in this layer.
*
* Outputs:
- * - W: Weight matrix, of shape (D, M).
- * - b: Biases vector, of shape (1, M).
+ * - W: Weights, of shape (D, M).
+ * - b: Biases, of shape (1, M).
*/
W = rand(rows=D, cols=M, pdf="normal") * sqrt(2.0/D)
b = matrix(0, rows=1, cols=M)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml b/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
index d332e8c..82240f7 100644
--- a/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
@@ -22,6 +22,7 @@
/*
* Batch normalization layer.
*/
+
forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
string mode, matrix[double] ema_mean, matrix[double] ema_var,
double mu, double epsilon)
@@ -36,7 +37,7 @@ forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
* introduces learnable parameters (gamma, beta) to control the
* amount of normalization.
*
- * y = ((x-mean) / sqrt(var+eps)) * gamma + beta
+ * `y = ((x-mean) / sqrt(var+eps)) * gamma + beta`
*
* This implementation maintains exponential moving averages of the
* mean and variance during training for use during testing.
@@ -47,7 +48,7 @@ forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
* - https://arxiv.org/abs/1502.03167
*
* Inputs:
- * - X: Input data matrix, of shape (N, D).
+ * - X: Inputs, of shape (N, D).
* - gamma: Scale parameters, of shape (1, D).
* - beta: Shift parameters, of shape (1, D).
* - mode: 'train' or 'test' to indicate if the model is currently
@@ -118,7 +119,7 @@ backward = function(matrix[double] dout, matrix[double] out,
* Computes the backward pass for a batch normalization layer.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, D).
+ * - dout: Gradient wrt `out` from upstream, of shape (N, D).
* - out: Outputs from the forward pass, of shape (N, D).
* - ema_mean_upd: Updated exponential moving average of the mean
* from the forward pass, of shape (1, D).
@@ -133,7 +134,7 @@ backward = function(matrix[double] dout, matrix[double] out,
* - cache_norm: Cache of the normalized inputs from the forward
* pass, of shape (N, D). Note: This is used for performance
* during training.
- * - X: Input data matrix to the forward pass, of shape (N, D).
+ * - X: Inputs, of shape (N, D).
* - gamma: Scale parameters, of shape (1, D).
* - beta: Shift parameters, of shape (1, D).
* - mode: 'train' or 'test' to indicate if the model is currently
@@ -151,9 +152,9 @@ backward = function(matrix[double] dout, matrix[double] out,
* Typical values are in the range of [1e-5, 1e-3].
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, D).
- * - dgamma: Gradient wrt W, of shape (1, D).
- * - dbeta: Gradient wrt b, of shape (1, D).
+ * - dX: Gradient wrt `X`, of shape (N, D).
+ * - dgamma: Gradient wrt `W`, of shape (1, D).
+ * - dbeta: Gradient wrt `b`, of shape (1, D).
*
*/
N = nrow(X)
@@ -190,7 +191,7 @@ init = function(int D)
* may be initialized manually if needed.
*
* Inputs:
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
*
* Outputs:
* - gamma: Scale parameters, of shape (1, D).
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/conv.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv.dml b/scripts/staging/SystemML-NN/nn/layers/conv.dml
index cc60a46..435b3cf 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv.dml
@@ -39,9 +39,9 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
* output maps.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -50,14 +50,14 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
- * For same output height as input, set padh = (Hf - 1) / 2,
- * assuming strideh = 1.
- * More generally, padh = (Hin*(strideh-1) + Hf - strideh) / 2
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
* preserves the spatial dimensions of the input.
* - padw: Padding for left and right sides.
- * For same output width as input, set padw = (Wf - 1) / 2,
- * assuming stridew = 1.
- * More generally, padw = (Win*(stridew-1) + Wf - stridew) / 2
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
* preserves the spatial dimensions of the input.
*
* Outputs:
@@ -67,8 +67,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
*/
N = nrow(X)
F = nrow(W)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
# Create output volume
out = matrix(0, rows=N, cols=F*Hout*Wout)
@@ -101,12 +101,13 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* This implementation uses `im2col` and `col2im` internally.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
- * - X: Previous input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -118,9 +119,9 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* - padw: Padding for left and right sides.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt W, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt b, of shape (F, 1).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
*/
N = nrow(X)
F = nrow(W)
@@ -171,10 +172,11 @@ init = function(int F, int C, int Hf, int Wf)
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
- * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
- * which limits the magnification of inputs/gradients during
- * forward/backward passes by scaling unit-Gaussian weights by a
- * factor of sqrt(2/n), under the assumption of relu neurons.
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
*
* Inputs:
* - F: Number of filters.
@@ -183,8 +185,8 @@ init = function(int F, int C, int Hf, int Wf)
* - Wf: Filter width.
*
* Outputs:
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
*/
W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
b = matrix(0, rows=F, cols=1)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
index 44df74a..c2b809e 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
@@ -22,6 +22,7 @@
/*
* 2D Convolutional layer.
*/
+
forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
int C, int Hin, int Win, int Hf, int Wf,
int strideh, int stridew, int padh, int padw)
@@ -32,10 +33,10 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
* volume unrolled into a single vector.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* - Hf: Filter height.
@@ -43,14 +44,14 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
- * For same output height as input, set padh = (Hf - 1) / 2,
- * assuming strideh = 1.
- * More generally, padh = (Hin*(strideh-1) + Hf - strideh) / 2
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
* preserves the spatial dimensions of the input.
* - padw: Padding for left and right sides.
- * For same output width as input, set padw = (Wf - 1) / 2,
- * assuming stridew = 1.
- * More generally, padw = (Win*(stridew-1) + Wf - stridew) / 2
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
* preserves the spatial dimensions of the input.
*
* Outputs:
@@ -60,8 +61,8 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
*/
N = nrow(X)
F = nrow(W)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
# Convolution - built-in implementation
out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf],
@@ -81,13 +82,14 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* with F filters.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, F*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
- * - X: Previous input data matrix, of shape (N, C*Hin*Win).
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of depth).
* - Hin: Input height.
* - Win: Input width.
* - Hf: Filter height.
@@ -95,12 +97,20 @@ backward = function(matrix[double] dout, int Hout, int Wout,
* - strideh: Stride over height.
* - stridew: Stride over width.
* - padh: Padding for top and bottom sides.
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
+ * preserves the spatial dimensions of the input.
* - padw: Padding for left and right sides.
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
+ * preserves the spatial dimensions of the input.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt W, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt b, of shape (F, 1).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
*/
N = nrow(X)
F = nrow(W)
@@ -123,10 +133,11 @@ init = function(int F, int C, int Hf, int Wf)
* Note: This is just a convenience function, and parameters
* may be initialized manually if needed.
*
- * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
- * which limits the magnification of inputs/gradients during
- * forward/backward passes by scaling unit-Gaussian weights by a
- * factor of sqrt(2/n), under the assumption of relu neurons.
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
*
* Inputs:
* - F: Number of filters.
@@ -135,8 +146,8 @@ init = function(int F, int C, int Hf, int Wf)
* - Wf: Filter width.
*
* Outputs:
- * - W: Weights (parameters) matrix, of shape (F, C*Hf*Wf).
- * - b: Biases vector, of shape (F, 1).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
*/
W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
b = matrix(0, rows=F, cols=1)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
index f9cd507..55552e1 100644
--- a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
@@ -21,11 +21,8 @@
/*
* Cross-entropy loss function.
- *
- * L_i = -y_i^T * log(pred_i), where y_i and pred_i are K-dimensional
- * vectors of class probs.
- * L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
+
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
@@ -33,16 +30,26 @@ forward = function(matrix[double] pred, matrix[double] y)
* inputs consist of N examples, each with K dimensions corresponding
* to normalized probabilities of K classes.
*
+ * ```
+ * L_i = -y_i^T * log(pred_i)
+ * L = (1/N) sum(L_i) for i=1 to N
+ * ```
+ *
+ * In these equations, `L` is the total loss, `L_i` is the loss for
+ * example `i`, `y_i` is the K-dimensional vector of target class
+ * probabilities, `pred_i` is K-dimensional vector of predicted
+ * class probabilities, and `N` is the number of examples.
+ *
* This can be interpreted as the negative log-likelihood assuming
* a Bernoulli distribution generalized to K dimensions, or a
- * Multinomial with 1 observation.
+ * Multinomial with one observation.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, K).
- * - y: Target matrix, of shape (N, K).
+ * - pred: Predictions, of shape (N, K).
+ * - y: Targets, of shape (N, K).
*
* Outputs:
- * - loss: Scalar loss, of shape (1).
+ * - loss: Average loss.
*/
N = nrow(y)
eps = 1e-10 # numerical stability to avoid log(0)
@@ -58,11 +65,11 @@ backward = function(matrix[double] pred, matrix[double] y)
* to normalized probabilities of K classes.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, K).
- * - y: Target matrix, of shape (N, K).
+ * - pred: Predictions, of shape (N, K).
+ * - y: Targets, of shape (N, K).
*
* Outputs:
- * - dpred: Gradient wrt pred, of shape (N, K).
+ * - dpred: Gradient wrt `pred`, of shape (N, K).
*/
N = nrow(y)
eps = 1e-10 # numerical stability to avoid divide-by-zero
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/dropout.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/dropout.dml b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
index 2b1bd1d..b348642 100644
--- a/scripts/staging/SystemML-NN/nn/layers/dropout.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
@@ -22,6 +22,7 @@
/*
* Dropout layer.
*/
+
forward = function(matrix[double] X, double p, int seed)
return (matrix[double] out, matrix[double] mask) {
/*
@@ -32,14 +33,13 @@ forward = function(matrix[double] X, double p, int seed)
* the outputs of neurons) at test time.
*
* Inputs:
- * - X: Input data matrix, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
* - p: Probability of keeping a neuron output.
- * - seed: [Optional: -1] Random number generator seed. Setting this
- * allows for deterministic evaluation. Set to -1 for a random
- * seed.
+ * - seed: [Optional: -1] Random number generator seed to allow for
+ * deterministic evaluation. Set to -1 for a random seed.
*
* Outputs:
- * - out: Ouptuts, of same shape as X.
+ * - out: Outputs, of same shape as `X`.
* - mask: Dropout mask used to compute the output.
*/
# Normally, we might use something like
@@ -48,8 +48,7 @@ forward = function(matrix[double] X, double p, int seed)
# the `rand` function that allows use to create a mask directly.
if (seed == -1) {
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p)
- }
- else {
+ } else {
mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed)
}
out = X * mask / p
@@ -64,13 +63,13 @@ backward = function(matrix[double] dout, matrix[double] X, double p, matrix[doub
* maintain the expected values at test time.
*
* Inputs:
- * - dout: Derivatives from upstream, of same shape as X.
- * - X: Previous input data matrix, of shape (any, any).
- * - p: Previous probability of keeping a neuron output.
- * - mask: Previous dropout mask used to compute the output.
+ * - dout: Gradient wrt `out`, of same shape as `X`.
+ * - X: Inputs, of shape (any, any).
+ * - p: Probability of keeping a neuron output.
+ * - mask: Dropout mask used to compute the output.
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
dX = mask / p * dout
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
index 7d6c821..24b15e2 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
@@ -21,28 +21,35 @@
/*
* L1 loss function.
- *
- * L_i = sum_j(abs((pred_i)_j - (y_i)_j)) for all j.
- * L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
+
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for an L1 loss function. The inputs
* consist of N examples, each with M dimensions to predict.
*
+ * ```
+ * L_i = sum_j(abs((pred_i)_j - (y_i)_j)) for all j.
+ * L = (1/N) sum(L_i) for i=1 to N
+ * ```
+ *
+ * In these equations, `L` is the total loss, `L_i` is the loss for
+ * example `i`, `y_i` is the scalar target, `pred_i` is the scalar
+ * prediction, and `N` is the number of examples.
+ *
* This can be interpreted as the negative log-likelihood assuming
* a Laplace distribution.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, M).
- * - y: Target matrix, of shape (N, M).
+ * - pred: Predictions, of shape (N, M).
+ * - y: Targets, of shape (N, M).
*
* Outputs:
- * - loss: Scalar loss, of shape (1).
+ * - loss: Average loss.
*/
N = nrow(y)
- losses = rowSums(abs(pred - y))
+ losses = rowSums(abs(pred-y))
loss = sum(losses) / N
}
@@ -53,13 +60,13 @@ backward = function(matrix[double] pred, matrix[double] y)
* consist of N examples, each with M dimensions to predict.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, M).
- * - y: Target matrix, of shape (N, M).
+ * - pred: Predictions, of shape (N, M).
+ * - y: Targets, of shape (N, M).
*
* Outputs:
- * - dpred: Gradient wrt pred, of shape (N, M).
+ * - dpred: Gradient wrt `pred`, of shape (N, M).
*/
N = nrow(y)
- dpred = sign(pred - y) / N
+ dpred = sign(pred-y) / N
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml b/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
index b2175ab..f643274 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
@@ -22,31 +22,34 @@
/*
* L1 regularizataion.
*/
-forward = function(matrix[double] X, double lambda) return (double reg_loss) {
+
+forward = function(matrix[double] X, double lambda)
+ return (double reg_loss) {
/*
* Computes the forward pass for an L1 regularization function.
*
* Inputs:
- * - X: Parameters, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
* - lambda: Regularization strength.
* A typical value is 0.01.
*
* Outputs:
- * - reg_loss: Scalar L1 regularization loss, of shape (1).
+ * - reg_loss: Total regularization loss.
*/
reg_loss = lambda * sum(abs(X))
}
-backward = function(matrix[double] X, double lambda) return (matrix[double] dX) {
+backward = function(matrix[double] X, double lambda)
+ return (matrix[double] dX) {
/*
* Computes the backward pass for an L1 regularization function.
*
* Inputs:
- * - X: Parameters, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
* - lambda: Regularization strength.
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
dX = lambda * sign(X)
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
index 9f27cc2..df8bc1c 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
@@ -21,28 +21,35 @@
/*
* L2 loss function.
- *
- * L_i = (1/2) 2norm(pred_i - y_i)^2
- * L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
+
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for an L2 loss function. The inputs
* consist of N examples, each with M dimensions to predict.
*
+ * ```
+ * L_i = (1/2) norm(pred_i - y_i)^2
+ * L = (1/N) sum(L_i) for i=1 to N
+ * ```
+ *
+ * In these equations, `L` is the total loss, `L_i` is the loss for
+ * example `i`, `y_i` is the scalar target, `pred_i` is the scalar
+ * prediction, and `N` is the number of examples.
+ *
* This can be interpreted as the negative log-likelihood assuming
* a Gaussian distribution.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, M).
- * - y: Target matrix, of shape (N, M).
+ * - pred: Predictions, of shape (N, M).
+ * - y: Targets, of shape (N, M).
*
* Outputs:
- * - loss: Scalar loss, of shape (1).
+ * - loss: Average loss.
*/
N = nrow(y)
- losses = 0.5 * rowSums((pred - y)^2)
+ losses = 0.5 * rowSums((pred-y)^2)
loss = sum(losses) / N
}
@@ -53,13 +60,13 @@ backward = function(matrix[double] pred, matrix[double] y)
* consist of N examples, each with M dimensions to predict.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, M).
- * - y: Target matrix, of shape (N, M).
+ * - pred: Predictions, of shape (N, M).
+ * - y: Targets, of shape (N, M).
*
* Outputs:
- * - dpred: Gradient wrt pred, of shape (N, M).
+ * - dpred: Gradient wrt `pred`, of shape (N, M).
*/
N = nrow(y)
- dpred = (pred - y) / N
+ dpred = (pred-y) / N
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml b/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
index 44f2a54..5074c06 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
@@ -22,31 +22,34 @@
/*
* L2 regularizataion.
*/
-forward = function(matrix[double] X, double lambda) return (double reg_loss) {
+
+forward = function(matrix[double] X, double lambda)
+ return (double reg_loss) {
/*
* Computes the forward pass for an L2 regularization function.
*
* Inputs:
- * - X: Parameters, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
* - lambda: Regularization strength.
* A typical value is 0.01.
*
* Outputs:
- * - reg_loss: Scalar l2 regularization loss, of shape (1).
+ * - reg_loss: Total regularization loss.
*/
reg_loss = 0.5 * lambda * sum(X^2)
}
-backward = function(matrix[double] X, double lambda) return (matrix[double] dX) {
+backward = function(matrix[double] X, double lambda)
+ return (matrix[double] dX) {
/*
* Computes the backward pass for an L2 regularization function.
*
* Inputs:
- * - X: Parameters, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
* - lambda: Regularization strength.
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
dX = lambda * X
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
index ad5e561..7dd85d3 100644
--- a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
@@ -21,30 +21,37 @@
/*
* Log loss function.
- *
- * L_i = -y_i*log(pred_i) - (1-y_i)*log(1-pred_i), where y_i is a
- * binary target, and pred_i is a probability of y=1.
- * L = (1/N) sum(L_i) for i=1 to N, where N is the number of examples.
*/
+
forward = function(matrix[double] pred, matrix[double] y)
return (double loss) {
/*
* Computes the forward pass for a log loss function.
*
+ * ```
+ * L_i = -y_i*log(pred_i) - (1-y_i)*log(1-pred_i)
+ * L = (1/N) sum(L_i) for i=1 to N
+ * ```
+ *
+ * In these equations, `L` is the total loss, `L_i` is the loss for
+ * example `i`, `y_i` is the binary target, `pred_i` is probability
+ * of the true class (i.e. `y=1`), and `N` is the number of examples.
+ *
* This can be interpreted as the negative log-likelihood assuming
* a Bernoulli distribution.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, 1). Predictions should
- * be probabilities that y=1.
- * - y: Target matrix, of shape (N, 1). Targets should be binary
- * in the set {0,1}.
+ * - pred: Predictions, of shape (N, 1).
+ * Predictions should be probabilities of the true
+ * class (i.e. probability of `y=1`).
+ * - y: Targets, of shape (N, 1).
+ * Targets should be binary in the set {0, 1}.
*
* Outputs:
- * - loss: Scalar loss, of shape (1).
+ * - loss: Average loss.
*/
N = nrow(y)
- losses = -y * log(pred) - (1-y) * log(1-pred)
+ losses = -y*log(pred) - (1-y)*log(1-pred)
loss = sum(losses) / N
}
@@ -54,15 +61,16 @@ backward = function(matrix[double] pred, matrix[double] y)
* Computes the backward pass for a log loss function.
*
* Inputs:
- * - pred: Prediction matrix, of shape (N, 1). Predictions should
- * be probabilities that y=1.
- * - y: Target matrix, of shape (N, 1). Targets should be binary
- * in the set {0,1}.
+ * - pred: Predictions, of shape (N, 1).
+ * Predictions should be probabilities of the true
+ * class (i.e. probability of `y=1`).
+ * - y: Targets, of shape (N, 1).
+ * Targets should be binary in the set {0, 1}.
*
* Outputs:
- * - dpred: Gradient wrt pred, of shape (N, 1).
+ * - dpred: Gradient wrt `pred`, of shape (N, 1).
*/
N = nrow(y)
- dpred = (1/N) * (pred-y) / (pred * (1-pred))
+ dpred = (1/N) * (pred-y) / (pred*(1-pred))
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/lstm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/lstm.dml b/scripts/staging/SystemML-NN/nn/layers/lstm.dml
index 0dd9f4c..44f2ef2 100644
--- a/scripts/staging/SystemML-NN/nn/layers/lstm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/lstm.dml
@@ -44,16 +44,16 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T,
* - http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf
*
* Inputs:
- * - X: Input data matrix, of shape (N, T*D).
- * - W: Weights (parameters) matrix, of shape (D+M, 4M).
- * - b: Biases vector, of shape (1, 4M).
+ * - X: Inputs, of shape (N, T*D).
+ * - W: Weights, of shape (D+M, 4M).
+ * - b: Biases, of shape (1, 4M).
* - T: Length of example sequences (number of timesteps).
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - return_sequences: Whether to return `out` at all timesteps,
* or just for the final timestep.
- * - out0: Output matrix at previous timestep, of shape (N, M).
+ * - out0: Outputs from previous timestep, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
- * - c0: Initial cell state matrix, of shape (N, M).
+ * - c0: Initial cell state, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
*
* Outputs:
@@ -123,23 +123,27 @@ backward = function(matrix[double] dout, matrix[double] dc,
* Computes the backward pass for an LSTM layer with M neurons.
*
* Inputs:
- * - dout: Gradient on output from upstream. If `given_sequences`
- * is True, contains gradients on outputs for all timesteps,
- * of shape (N, T*M). Else, contains gradient on output for
- * the final timestep, of shape (N, M).
- * - dc: Gradient on final (current) cell state from later in time,
- * of shape (N, M).
- * - X: Input data matrix, of shape (N, T*D).
- * - W: Weights (parameters) matrix, of shape (D+M, 4M).
- * - b: Biases vector, of shape (1, 4M).
+ * - dout: Gradient wrt `out`. If `given_sequences` is `True`,
+ * contains gradients on outputs for all timesteps, of
+ * shape (N, T*M). Else, contains the gradient on the output
+ * for the final timestep, of shape (N, M).
+ * - dc: Gradient wrt `c` (from later in time), of shape (N, M).
+ * This would come from later in time if the cell state was used
+ * downstream as the initial cell state for another LSTM layer.
+ * Typically, this would be used when a sequence was cut at
+ * timestep `T` and then continued in the next batch. If `c`
+ * was not used downstream, then `dc` would be an empty matrix.
+ * - X: Inputs, of shape (N, T*D).
+ * - W: Weights, of shape (D+M, 4M).
+ * - b: Biases, of shape (1, 4M).
* - T: Length of example sequences (number of timesteps).
* - D: Dimensionality of the input features.
* - given_sequences: Whether `dout` is for all timesteps,
* or just for the final timestep. This is based on whether
* `return_sequences` was true in the forward pass.
- * - out0: Output matrix at previous timestep, of shape (N, M).
+ * - out0: Outputs from previous timestep, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
- * - c0: Initial cell state matrix, of shape (N, M).
+ * - c0: Initial cell state, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
* - cache_out: Cache of outputs, of shape (T, N*M).
* Note: This is used for performance during training.
@@ -149,11 +153,11 @@ backward = function(matrix[double] dout, matrix[double] dc,
* Note: This is used for performance during training.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, T*D).
- * - dW: Gradient wrt W, of shape (D+M, 4M).
- * - db: Gradient wrt b, of shape (1, 4M).
- * - dout0: Gradient wrt out0, of shape (N, M).
- * - dc0: Gradient wrt c0, of shape (N, M).
+ * - dX: Gradient wrt `X`, of shape (N, T*D).
+ * - dW: Gradient wrt `W`, of shape (D+M, 4M).
+ * - db: Gradient wrt `b`, of shape (1, 4M).
+ * - dout0: Gradient wrt `out0`, of shape (N, M).
+ * - dc0: Gradient wrt `c0`, of shape (N, M).
*/
N = nrow(X)
M = as.integer(ncol(W)/4)
@@ -190,7 +194,7 @@ backward = function(matrix[double] dout, matrix[double] dc,
g = ifog[,3*M+1:4*M] # g gate, shape (N, M)
tmp = tanh::backward(dout_t, ct)
- dct = dct + o * tmp # shape (N, M)
+ dct = dct + o*tmp # shape (N, M)
tmp = tanh::forward(ct)
do = tmp * dout_t # output gate, shape (N, M)
df = c_prev * dct # forget gate, shape (N, M)
@@ -201,7 +205,7 @@ backward = function(matrix[double] dout, matrix[double] dc,
di_raw = i * (1-i) * di
df_raw = f * (1-f) * df
do_raw = o * (1-o) * do
- dg_raw = (1 - g^2) * dg
+ dg_raw = (1-g^2) * dg
difog_raw = cbind(di_raw, cbind(df_raw, cbind(do_raw, dg_raw))) # shape (N, 4M)
dW = dW + t(input) %*% difog_raw # shape (D+M, 4M)
@@ -217,7 +221,7 @@ backward = function(matrix[double] dout, matrix[double] dc,
dout[,(t-2)*M+1:(t-1)*M] = dout[,(t-2)*M+1:(t-1)*M] + dout_prev # shape (N, M)
dct = dc_prev # shape (N, M)
}
- t = t-1
+ t = t - 1
}
}
@@ -232,17 +236,18 @@ init = function(int N, int D, int M)
* We use the Glorot uniform heuristic which limits the magnification
* of inputs/gradients during forward/backward passes by scaling
* uniform weights by a factor of sqrt(6/(fan_in + fan_out)).
+ * - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
*
* Inputs:
* - N: Number of examples in batch.
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - M: Number of neurons in this layer.
*
* Outputs:
- * - W: Weights (parameters) matrix, of shape (D+M, 4M).
- * - b: Biases vector, of shape (1, 4M).
- * - out0: Dummy output matrix at previous timestep, of shape (N, M).
- * - c0: Initial empty cell state matrix, of shape (N, M).
+ * - W: Weights, of shape (D+M, 4M).
+ * - b: Biases, of shape (1, 4M).
+ * - out0: Empty previous timestep output matrix, of shape (N, M).
+ * - c0: Empty initial cell state matrix, of shape (N, M).
*/
fan_in = D+M
fan_out = 4*M
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
index 22e1747..a12877f 100644
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
@@ -38,7 +38,7 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* the output maps.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -57,8 +57,8 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* - Wout: Output width.
*/
N = nrow(X)
- Hout = as.integer((Hin + 2 * padh - Hf) / strideh + 1)
- Wout = as.integer((Win + 2 * padw - Wf) / stridew + 1)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
pad_value = -1/0 # in max pooling we pad with -infinity
# Create output volume
@@ -96,7 +96,8 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* unrolled into a single vector.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, C*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
* - X: Input data matrix, of shape (N, C*Hin*Win).
@@ -113,7 +114,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* A typical value is 0.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
*/
N = nrow(X)
pad_value = -1/0 # in max pooling we pad with -infinity
@@ -134,9 +135,9 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
for (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
+ hin = (hout-1)*strideh + 1
for (wout in 1:Wout) { # all output columns
- win = (wout-1) * stridew + 1
+ win = (wout-1)*stridew + 1
img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
# gradient passes through only for the max value(s) in this patch
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
index ae2b4a1..f1cb863 100644
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
@@ -22,6 +22,7 @@
/*
* Max pooling layer.
*/
+
forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
int strideh, int stridew, int padh, int padw)
return (matrix[double] out, int Hout, int Wout) {
@@ -36,7 +37,7 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* the output maps.
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -55,8 +56,8 @@ forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
* - Wout: Output width.
*/
N = nrow(X)
- Hout = as.integer((Hin - Hf) / strideh + 1)
- Wout = as.integer((Win - Wf) / stridew + 1)
+ Hout = as.integer((Hin-Hf)/strideh + 1)
+ Wout = as.integer((Win-Wf)/stridew + 1)
# Max pooling - built-in implementation
out = max_pool(X, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf],
@@ -73,10 +74,11 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* unrolled into a single vector.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, C*Hout*Wout).
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
* - Hout: Output height.
* - Wout: Output width.
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - C: Number of input channels (dimensionality of input depth).
* - Hin: Input height.
* - Win: Input width.
@@ -90,7 +92,7 @@ backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
* A typical value is 0.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
*/
N = nrow(X)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/relu.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/relu.dml b/scripts/staging/SystemML-NN/nn/layers/relu.dml
index a5c5230..6a4c15c 100644
--- a/scripts/staging/SystemML-NN/nn/layers/relu.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/relu.dml
@@ -22,33 +22,37 @@
/*
* Rectified Linear Unit (ReLU) nonlinearity layer.
*/
-forward = function(matrix[double] X) return (matrix[double] out) {
+
+forward = function(matrix[double] X)
+ return (matrix[double] out) {
/*
* Computes the forward pass for a ReLU nonlinearity layer.
*
- * Performs an element-wise evaluation of f(input) = max(0, input).
+ * Performs an element-wise evaluation of `f(input) = max(0, input)`.
*
* Inputs:
- * - X: Input data matrix, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
*
* Outputs:
- * - out: Ouptuts, of same shape as X.
+ * - out: Outputs, of same shape as `X`.
*/
- out = max(0.0, X)
+ out = max(X, 0)
}
-backward = function(matrix[double] dout, matrix[double] X) return (matrix[double] dX) {
+backward = function(matrix[double] dout, matrix[double] X)
+ return (matrix[double] dX) {
/*
* Computes the backward pass for a ReLU nonlinearity layer.
*
- * Essentially performs a pass-through of the upstream gradient for cells > 0.
+ * Essentially performs a pass-through of the upstream gradient
+ * for cells > 0.
*
* Inputs:
- * - dout: Derivatives from upstream, of same shape as X.
+ * - dout: Gradient wrt `out` from upstream, of same shape as `X`.
* - X: Previous input data matrix, of shape (any, any).
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
dX = (X > 0) * dout
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/rnn.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/rnn.dml b/scripts/staging/SystemML-NN/nn/layers/rnn.dml
index cd3eefe..cdceab8 100644
--- a/scripts/staging/SystemML-NN/nn/layers/rnn.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/rnn.dml
@@ -35,14 +35,14 @@ forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T,
* in as an additional input at the current timestep.
*
* Inputs:
- * - X: Input data matrix, of shape (N, T*D).
- * - W: Weights (parameters) matrix, of shape (D+M, M).
- * - b: Biases vector, of shape (1, M).
+ * - X: Inputs, of shape (N, T*D).
+ * - W: Weights, of shape (D+M, M).
+ * - b: Biases, of shape (1, M).
* - T: Length of example sequences (number of timesteps).
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - return_sequences: Whether to return `out` at all timesteps,
* or just for the final timestep.
- * - out0: Output matrix at previous timestep, of shape (N, M).
+ * - out0: Output matrix from previous timestep, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
*
* Outputs:
@@ -88,28 +88,28 @@ backward = function(matrix[double] dout, matrix[double] X, matrix[double] W, mat
* Computes the backward pass for a simple RNN layer with M neurons.
*
* Inputs:
- * - dout: Gradient on output from upstream. If `given_sequences`
+ * - dout: Gradient wrt `out` from upstream. If `given_sequences`
* is True, contains gradients on outputs for all timesteps,
* of shape (N, T*M). Else, contains gradient on output for
* the final timestep, of shape (N, M).
- * - X: Input data matrix, of shape (N, T*D).
- * - W: Weights (parameters) matrix, of shape (D+M, M).
- * - b: Biases vector, of shape (1, M).
+ * - X: Inputs, of shape (N, T*D).
+ * - W: Weights, of shape (D+M, M).
+ * - b: Biases, of shape (1, M).
* - T: Length of example sequences (number of timesteps).
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - given_sequences: Whether `dout` is for all timesteps,
* or just for the final timestep. This is based on whether
* `return_sequences` was true in the forward pass.
- * - out0: Output matrix at previous timestep, of shape (N, M).
+ * - out0: Output matrix from previous timestep, of shape (N, M).
* Note: This is *optional* and could just be an empty matrix.
* - cache_out: Cache of outputs, of shape (T, N*M).
* Note: This is used for performance during training.
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, T*D).
- * - dW: Gradient wrt W, of shape (D+M, 4M).
- * - db: Gradient wrt b, of shape (1, 4M).
- * - dout0: Gradient wrt out0, of shape (N, M).
+ * - dX: Gradient wrt `X`, of shape (N, T*D).
+ * - dW: Gradient wrt `W`, of shape (D+M, 4M).
+ * - db: Gradient wrt `b`, of shape (1, 4M).
+ * - dout0: Gradient wrt `out0`, of shape (N, M).
*/
N = nrow(X)
M = ncol(W)
@@ -134,7 +134,7 @@ backward = function(matrix[double] dout, matrix[double] X, matrix[double] W, mat
out_prev = matrix(cache_out[t-1,], rows=N, cols=M) # shape (N, M)
}
input = cbind(X_t, out_prev) # shape (N, D+M)
- dout_t_raw = (1 - out_t^2) * dout_t # into tanh, shape (N, M)
+ dout_t_raw = (1-out_t^2) * dout_t # into tanh, shape (N, M)
dW = dW + t(input) %*% dout_t_raw # shape (D+M, M)
db = db + colSums(dout_t_raw) # shape (1, M)
dinput = dout_t_raw %*% t(W) # shape (N, D+M)
@@ -146,7 +146,7 @@ backward = function(matrix[double] dout, matrix[double] X, matrix[double] W, mat
else {
dout[,(t-2)*M+1:(t-1)*M] = dout[,(t-2)*M+1:(t-1)*M] + dout_prev # shape (N, M)
}
- t = t-1
+ t = t - 1
}
}
@@ -161,16 +161,17 @@ init = function(int N, int D, int M)
* We use the Glorot uniform heuristic which limits the magnification
* of inputs/gradients during forward/backward passes by scaling
* uniform weights by a factor of sqrt(6/(fan_in + fan_out)).
+ * - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
*
* Inputs:
* - N: Number of examples in batch.
- * - D: Dimensionality of the input features.
+ * - D: Dimensionality of the input features (number of features).
* - M: Number of neurons in this layer.
*
* Outputs:
- * - W: Weights (parameters) matrix, of shape (D+M, M).
- * - b: Biases vector, of shape (1, M).
- * - out0: Dummy output matrix at previous timestep, of shape (N, M).
+ * - W: Weights, of shape (D+M, M).
+ * - b: Biases, of shape (1, M).
+ * - out0: Empty previous timestep output matrix, of shape (N, M).
*/
fan_in = D+M
fan_out = M
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml b/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
index a7066f2..185befb 100644
--- a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
@@ -22,33 +22,41 @@
/*
* Sigmoid nonlinearity layer.
*/
-forward = function(matrix[double] X) return (matrix[double] out) {
+
+forward = function(matrix[double] X)
+ return (matrix[double] out) {
/*
* Computes the forward pass for a sigmoid nonlinearity layer.
*
- * sigmoid(x) = 1 / (1 + e^-x)
+ * `sigmoid(x) = 1 / (1 + e^-x)`
+ *
+ * If `X` contains a single feature column, the output of a sigmoid
+ * layer can be interpreted as a predicted probability of a true
+ * class when paired with a log loss function in a binary
+ * classification problem.
*
* Inputs:
- * - X: Input data matrix, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
*
* Outputs:
- * - out: Ouptuts, of same shape as X.
+ * - out: Outputs, of same shape as `X`.
*/
- out = 1 / (1 + exp(-X))
+ out = 1 / (1+exp(-X))
}
-backward = function(matrix[double] dout, matrix[double] X) return (matrix[double] dX) {
+backward = function(matrix[double] dout, matrix[double] X)
+ return (matrix[double] dX) {
/*
* Computes the backward pass for a sigmoid nonlinearity layer.
*
* Inputs:
- * - dout: Derivatives from upstream, of same shape as X.
- * - X: Previous input data matrix, of shape (any, any).
+ * - dout: Gradient wrt `out` from upstream, of same shape as `X`.
+ * - X: Inputs, of shape (any, any).
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
- out = 1 / (1 + exp(-X))
- dX = out * (1 - out) * dout
+ out = 1 / (1+exp(-X))
+ dX = out * (1-out) * dout
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/softmax.dml b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
index 854e8a8..1751838 100644
--- a/scripts/staging/SystemML-NN/nn/layers/softmax.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
@@ -22,7 +22,9 @@
/*
* Softmax classifier layer.
*/
-forward = function(matrix[double] scores) return (matrix[double] probs) {
+
+forward = function(matrix[double] scores)
+ return (matrix[double] probs) {
/*
* Computes the forward pass for a softmax classifier. The inputs
* are interpreted as unnormalized, log-probabilities for each of
@@ -32,10 +34,10 @@ forward = function(matrix[double] scores) return (matrix[double] probs) {
* This can be interpreted as a generalization of the sigmoid
* function to multiple classes.
*
- * probs_ij = e^scores_ij / sum(e^scores_i)
+ * `probs_ij = e^scores_ij / sum(e^scores_i)`
*
* Inputs:
- * - scores: Input data matrix, of shape (N, D).
+ * - scores: Inputs, of shape (N, D).
*
* Outputs:
* - probs: Outputs, of shape (N, D).
@@ -56,20 +58,23 @@ backward = function(matrix[double] dprobs, matrix[double] scores)
/*
* Computes the backward pass for a softmax classifier.
*
- * Note that dscores_ij has multiple sources:
+ * Note that dscores_ij has multiple source branches:
*
- * dprobs_ij/dscores_ij = probs_ij * (1 - probs_ij)
- * dprobs_ik/dscores_ij = -probs_ik * probs_ij, for all k != j
+ * ```
+ * dprobs_ij/dscores_ij = probs_ij * (1 - probs_ij)
+ * dprobs_ik/dscores_ij = -probs_ik * probs_ij, for all k != j
*
- * dloss/dscores_ij = dloss/dprobs_ij * dprobs_ij/dscores_ij +
- * sum_{k!=j}(dloss/dprobs_ik * dprobs_ik/dscores_ij)
+ * dloss/dscores_ij =
+ * (dloss/dprobs_ij * dprobs_ij/dscores_ij)
+ * + sum_{k!=j}(dloss/dprobs_ik * dprobs_ik/dscores_ij)
+ * ```
*
* Inputs:
- * - dprobs: Derivatives from upstream, of shape (N, D).
- * - scores: Previous input data matrix, of shape (N, D).
+ * - dprobs: Gradient wrt `probs` from upstream, of shape (N, D).
+ * - scores: Inputs, of shape (N, D).
*
* Outputs:
- * - dscores: Gradient wrt scores, of shape (N, D).
+ * - dscores: Gradient wrt `scores`, of shape (N, D).
*/
scores = scores - rowMaxs(scores) # numerical stability
unnorm_probs = exp(scores) # unnormalized probabilities
@@ -77,6 +82,6 @@ backward = function(matrix[double] dprobs, matrix[double] scores)
# After some cancellation:
# dscores = dprobs*probs - probs*rowSums(dprobs*probs)
dtemp = dprobs * probs
- dscores = dtemp - probs * rowSums(dtemp)
+ dscores = dtemp - probs*rowSums(dtemp)
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml b/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
index 53ca989..0185a2c 100644
--- a/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
@@ -39,7 +39,7 @@ forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
* introduces learnable parameters (gamma, beta) to control the
* amount of normalization.
*
- * y = ((x-mean) / sqrt(var+eps)) * gamma + beta
+ * `y = ((x-mean) / sqrt(var+eps)) * gamma + beta`
*
* This implementation maintains exponential moving averages of the
* mean and variance during training for use during testing.
@@ -50,7 +50,7 @@ forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
* - https://arxiv.org/abs/1502.03167
*
* Inputs:
- * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - X: Inputs, of shape (N, C*Hin*Win).
* - gamma: Scale parameters, of shape (C, 1).
* - beta: Shift parameters, of shape (C, 1).
* - C: Number of input channels (dimensionality of input depth).
@@ -134,7 +134,7 @@ backward = function(matrix[double] dout, matrix[double] out,
* Computes the backward pass for a spatial batch normalization layer.
*
* Inputs:
- * - dout: Derivatives from upstream, of shape (N, C*Hin*Win).
+ * - dout: Gradient wrt `out` from upstream, of shape (N, C*Hin*Win).
* - out: Outputs from the forward pass, of shape (N, C*Hin*Win).
* - ema_mean_upd: Updated exponential moving average of the mean
* from the forward pass, of shape (C, 1).
@@ -171,9 +171,9 @@ backward = function(matrix[double] dout, matrix[double] out,
* Typical values are in the range of [1e-5, 1e-3].
*
* Outputs:
- * - dX: Gradient wrt X, of shape (N, C*Hin*Win).
- * - dgamma: Gradient wrt W, of shape (C, 1).
- * - dbeta: Gradient wrt b, of shape (C, 1).
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dgamma: Gradient wrt `W`, of shape (C, 1).
+ * - dbeta: Gradient wrt `b`, of shape (C, 1).
*
*/
N = nrow(X)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/layers/tanh.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/tanh.dml b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
index 9308a7c..589a574 100644
--- a/scripts/staging/SystemML-NN/nn/layers/tanh.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
@@ -24,38 +24,42 @@
*/
source("nn/layers/sigmoid.dml") as sigmoid
-forward = function(matrix[double] X) return (matrix[double] out) {
+forward = function(matrix[double] X)
+ return (matrix[double] out) {
/*
* Computes the forward pass for a tanh nonlinearity layer.
*
- * tanh(x) = (e^x - e^-x) / (e^x + e^-x)
- * = 2 * sigmoid(2x) - 1
+ * ```
+ * tanh(x) = (e^x - e^-x) / (e^x + e^-x)
+ * = 2 * sigmoid(2x) - 1
+ * ```
*
* Inputs:
- * - X: Input data matrix, of shape (any, any).
+ * - X: Inputs, of shape (any, any).
*
* Outputs:
- * - out: Ouptuts, of same shape as X.
+ * - out: Outputs, of same shape as `X`.
*/
# out = (exp(X) - exp(-X)) / (exp(X) + exp(-X))
# Simplification of the above formulation to use the sigmoid function:
sigma2X = sigmoid::forward(2*X)
- out = 2 * sigma2X - 1
+ out = 2*sigma2X - 1
}
-backward = function(matrix[double] dout, matrix[double] X) return (matrix[double] dX) {
+backward = function(matrix[double] dout, matrix[double] X)
+ return (matrix[double] dX) {
/*
* Computes the backward pass for a tanh nonlinearity layer.
*
* Inputs:
- * - dout: Derivatives from upstream, of same shape as X.
- * - X: Previous input data matrix, of shape (any, any).
+ * - dout: Gradient wrt `out` from upstream, of same shape as `X`.
+ * - X: Inputs, of shape (any, any).
*
* Outputs:
- * - dX: Gradient wrt X, of same shape as X.
+ * - dX: Gradient wrt `X`, of same shape as `X`.
*/
sigma2X = sigmoid::forward(2*X)
- out = 2 * sigma2X - 1
- dX = (1 - out^2) * dout
+ out = 2*sigma2X - 1
+ dX = (1-out^2) * dout
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
index 688109b..20b26c4 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
@@ -22,6 +22,7 @@
/*
* Adagrad optimizer.
*/
+
update = function(matrix[double] X, matrix[double] dX, double lr, double epsilon,
matrix[double] cache)
return (matrix[double] X, matrix[double] cache) {
@@ -39,24 +40,25 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double epsilon
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate.
* - epsilon: Smoothing term to avoid divide by zero errors.
* Typical values are in the range of [1e-8, 1e-4].
* - cache: State that maintains per-parameter sum of squared
- * gradients, of same shape as X.
+ * gradients, of same shape as `X`.
*
* Outputs:
- * - X: Updated parameters X, of same shape as input X.
- * - v: Updated velocity of the parameters X, of same shape as
- * input v.
+ * - X: Updated parameters `X`, of same shape as input `X`.
+ * - cache: State that maintains per-parameter sum of squared
+ * gradients, of same shape as `X`.
*/
cache = cache + dX^2
- X = X - lr * dX / (sqrt(cache) + epsilon)
+ X = X - (lr * dX / (sqrt(cache)+epsilon))
}
-init = function(matrix[double] X) return (matrix[double] cache) {
+init = function(matrix[double] X)
+ return (matrix[double] cache) {
/*
* Initialize the state for this optimizer.
*
@@ -65,10 +67,10 @@ init = function(matrix[double] X) return (matrix[double] cache) {
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- *
+ *
* Outputs:
* - cache: State that maintains per-parameter sum of squared
- * gradients, of same shape as X.
+ * gradients, of same shape as `X`.
*/
cache = matrix(0, rows=nrow(X), cols=ncol(X))
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/adam.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adam.dml b/scripts/staging/SystemML-NN/nn/optim/adam.dml
index a25f74d..0607fa5 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adam.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adam.dml
@@ -22,6 +22,7 @@
/*
* Adam optimizer.
*/
+
update = function(matrix[double] X, matrix[double] dX, double lr, double beta1, double beta2,
double epsilon, int t, matrix[double] m, matrix[double] v)
return (matrix[double] X, matrix[double] m, matrix[double] v) {
@@ -34,8 +35,8 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double beta1,
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate. Recommended value is 0.001.
* - beta1: Exponential decay rate for the 1st moment estimates.
* Recommended value is 0.9.
@@ -46,32 +47,33 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double beta1,
* - t: Timestep, starting at 0.
* - m: State containing the 1st moment (mean) estimate by
* maintaining exponential moving averages of the gradients, of
- * same shape as X.
+ * same shape as `X`.
* - v: State containing the 2nd raw moment (uncentered variance)
* estimate by maintaining exponential moving averages of the
- * squared gradients, of same shape as X.
+ * squared gradients, of same shape as `X`.
*
* Outputs:
- * - X: Updated parameters X, of same shape as input X.
+ * - X: Updated parameters `X`, of same shape as input `X`.
* - m: Updated state containing the 1st moment (mean) estimate by
* maintaining exponential moving averages of the gradients, of
- * same shape as X.
+ * same shape as `X`.
* - v: Updated state containing the 2nd raw moment (uncentered
* variance) estimate by maintaining exponential moving averages
- * of the squared gradients, of same shape as X.
+ * of the squared gradients, of same shape as `X`.
*/
t = t + 1
- m = beta1 * m + (1 - beta1) * dX # update biased 1st moment estimate
- v = beta2 * v + (1 - beta2) * dX^2 # update biased 2nd raw moment estimate
- #m = m / (1 - beta1^t) # compute bias-corrected 1st moment estimate
- #v = v / (1 - beta2^t) # compute bias-corrected 2nd raw moment estimate
- #X = X - lr * m / (sqrt(v) + epsilon) # param update
+ m = beta1*m + (1-beta1)*dX # update biased 1st moment estimate
+ v = beta2*v + (1-beta2)*dX^2 # update biased 2nd raw moment estimate
+ # m = m / (1-beta1^t) # compute bias-corrected 1st moment estimate
+ # v = v / (1-beta2^t) # compute bias-corrected 2nd raw moment estimate
+ # X = X - (lr * m / (sqrt(v)+epsilon)) # param update
# Simplified for computational efficiency:
- lr = lr * sqrt(1 - beta2^t) / (1 - beta1^t)
- X = X - lr * m / (sqrt(v) + epsilon)
+ lr = lr * sqrt(1-beta2^t) / (1-beta1^t)
+ X = X - (lr * m / (sqrt(v)+epsilon))
}
-init = function(matrix[double] X) return (matrix[double] m, matrix[double] v) {
+init = function(matrix[double] X)
+ return (matrix[double] m, matrix[double] v) {
/*
* Initialize the state for this optimizer.
*
@@ -80,14 +82,14 @@ init = function(matrix[double] X) return (matrix[double] m, matrix[double] v) {
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- *
+ *
* Outputs:
* - m: Initial state containing the 1st moment (mean) estimate by
* maintaining exponential moving averages of the gradients, of
- * same shape as X.
+ * same shape as `X`.
* - v: Initial state containing the 2nd raw moment (uncentered
* variance) estimate by maintaining exponential moving averages
- * of the squared gradients, of same shape as X.
+ * of the squared gradients, of same shape as `X`.
*/
m = matrix(0, rows=nrow(X), cols=ncol(X))
v = matrix(0, rows=nrow(X), cols=ncol(X))
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
index e256000..80c75a0 100644
--- a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
@@ -22,6 +22,7 @@
/*
* RMSprop optimizer.
*/
+
update = function(matrix[double] X, matrix[double] dX, double lr, double decay_rate,
double epsilon, matrix[double] cache)
return (matrix[double] X, matrix[double] cache) {
@@ -39,26 +40,27 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double decay_r
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate.
* - decay_rate: Term controlling the rate of the moving average.
* Typical values are in the range of [0.9, 0.999].
* - epsilon: Smoothing term to avoid divide by zero errors.
* Typical values are in the range of [1e-8, 1e-4].
* - cache: State that maintains the moving average of the squared
- * gradients, of same shape as X.
+ * gradients, of same shape as `X`.
*
* Outputs:
- * - X: Updated parameters X, of same shape as input X.
- * - v: Updated velocity of the parameters X, of same shape as
- * input v.
+ * - X: Updated parameters `X`, of same shape as input `X`.
+ * - cache: Updated state that maintains the moving average of the
+ * squared gradients, of same shape as `X`.
*/
- cache = decay_rate * cache + (1 - decay_rate) * dX^2
- X = X - lr * dX / (sqrt(cache) + epsilon)
+ cache = decay_rate*cache + (1-decay_rate)*dX^2
+ X = X - (lr * dX / (sqrt(cache)+epsilon))
}
-init = function(matrix[double] X) return (matrix[double] cache) {
+init = function(matrix[double] X)
+ return (matrix[double] cache) {
/*
* Initialize the state for this optimizer.
*
@@ -67,10 +69,10 @@ init = function(matrix[double] X) return (matrix[double] cache) {
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- *
+ *
* Outputs:
* - cache: State that maintains the moving average of the squared
- * gradients, of same shape as X.
+ * gradients, of same shape as `X`.
*/
cache = matrix(0, rows=nrow(X), cols=ncol(X))
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/sgd.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd.dml b/scripts/staging/SystemML-NN/nn/optim/sgd.dml
index 554569a..a3fc744 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd.dml
@@ -22,19 +22,21 @@
/*
* Stochastic Gradient Descent (SGD) optimizer.
*/
-update = function(matrix[double] X, matrix[double] dX, double lr) return (matrix[double] X) {
+
+update = function(matrix[double] X, matrix[double] dX, double lr)
+ return (matrix[double] X) {
/*
* Performs a vanilla SGD update.
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate.
*
* Outputs:
- * - X: Updated parameters X, of same shape as input X.
+ * - X: Updated parameters `X`, of same shape as input `X`.
*/
- X = X - lr * dX
+ X = X - lr*dX
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
index c2a441b..2cb9890 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
@@ -22,6 +22,7 @@
/*
* Stochastic Gradient Descent with momentum (SGD-momentum) optimizer.
*/
+
update = function(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v)
return (matrix[double] X, matrix[double] v) {
/*
@@ -33,25 +34,26 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double mu, mat
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate.
* - mu: Momentum value.
* Typical values are in the range of [0.5, 0.99], usually
* started at the lower end and annealed towards the higher end.
- * - v: State maintaining the velocity of the parameters X, of same
- * shape as X.
+ * - v: State maintaining the velocity of the parameters `X`, of same
+ * shape as `X`.
*
* Outputs:
- * - X: Updated parameters X, of same shape as input X.
- * - v: Updated velocity of the parameters X, of same shape as
- * input v.
+ * - X: Updated parameters `X`, of same shape as input `X`.
+ * - v: Updated velocity of the parameters `X`, of same shape as
+ * input `X`.
*/
- v = mu * v - lr * dX # update velocity
+ v = mu*v - lr*dX # update velocity
X = X + v # update position
}
-init = function(matrix[double] X) return (matrix[double] v) {
+init = function(matrix[double] X)
+ return (matrix[double] v) {
/*
* Initialize the state for this optimizer.
*
@@ -60,9 +62,9 @@ init = function(matrix[double] X) return (matrix[double] v) {
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- *
+ *
* Outputs:
- * - v: Initial velocity of the parameters X.
+ * - v: Initial velocity of the parameters `X`.
*/
v = matrix(0, rows=nrow(X), cols=ncol(X))
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16b1cbd7/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
index 56c6ab0..fee6585 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
@@ -22,6 +22,7 @@
/*
* Stochastic Gradient Descent with Nesterov momentum (SGD-Nesterov) optimizer.
*/
+
update = function(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v)
return (matrix[double] X, matrix[double] v) {
/*
@@ -36,19 +37,20 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double mu, mat
* store the parameters in their position after momentum.
*
* Reference:
- * - Advances in optimizing Recurrent Networks, Bengio et al., section 3.5.
+ * - Advances in optimizing Recurrent Networks, Bengio et al.,
+ * section 3.5.
* - http://arxiv.org/abs/1212.0901
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- * - dX: Gradient of X wrt to a loss function being optimized, of
- * same shape as X.
+ * - dX: Gradient wrt `X` of a loss function being optimized, of
+ * same shape as `X`.
* - lr: Learning rate.
* - mu: Momentum value.
* Typical values are in the range of [0.5, 0.99], usually
* started at the lower end and annealed towards the higher end.
- * - v: State maintaining the velocity of the parameters X, of same
- * shape as X.
+ * - v: State maintaining the velocity of the parameters `X`, of same
+ * shape as `X`.
*
* Outputs:
* - X: Updated parameters X, of same shape as input X.
@@ -56,11 +58,12 @@ update = function(matrix[double] X, matrix[double] dX, double lr, double mu, mat
* input v.
*/
v_prev = v
- v = mu * v - lr * dX # update velocity
- X = X - mu * v_prev + (1 + mu) * v # update position, including momentum
+ v = mu*v - lr*dX # update velocity
+ X = X - mu*v_prev + (1+mu)*v # update position, including momentum
}
-init = function(matrix[double] X) return (matrix[double] v) {
+init = function(matrix[double] X)
+ return (matrix[double] v) {
/*
* Initialize the state for this optimizer.
*
@@ -69,9 +72,9 @@ init = function(matrix[double] X) return (matrix[double] v) {
*
* Inputs:
* - X: Parameters to update, of shape (any, any).
- *
+ *
* Outputs:
- * - v: Initial velocity of the parameters X.
+ * - v: Initial velocity of the parameters `X`.
*/
v = matrix(0, rows=nrow(X), cols=ncol(X))
}
[7/7] incubator-systemml git commit: [MINOR] Comments and whitespace
fixes.
Posted by du...@apache.org.
[MINOR] Comments and whitespace fixes.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ac8ee2be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ac8ee2be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ac8ee2be
Branch: refs/heads/master
Commit: ac8ee2befb651ae89c481b63b4a8aa842585f7e4
Parents: 07039ca
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:39:19 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:39:19 2017 -0700
----------------------------------------------------------------------
.../staging/SystemML-NN/examples/mnist_lenet-predict.dml | 4 ++--
.../staging/SystemML-NN/examples/mnist_lenet-train.dml | 4 ++--
scripts/staging/SystemML-NN/examples/mnist_lenet.dml | 4 ++--
.../SystemML-NN/examples/mnist_softmax-predict.dml | 4 ++--
.../staging/SystemML-NN/examples/mnist_softmax-train.dml | 4 ++--
scripts/staging/SystemML-NN/examples/mnist_softmax.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/affine.dml | 11 ++++++-----
scripts/staging/SystemML-NN/nn/layers/batch_norm.dml | 6 +++---
scripts/staging/SystemML-NN/nn/layers/conv2d.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml | 4 ++--
.../staging/SystemML-NN/nn/layers/cross_entropy_loss.dml | 6 +++---
scripts/staging/SystemML-NN/nn/layers/dropout.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/l1_loss.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/l1_reg.dml | 6 +++---
scripts/staging/SystemML-NN/nn/layers/l2_loss.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/l2_reg.dml | 6 +++---
scripts/staging/SystemML-NN/nn/layers/log_loss.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/lstm.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml | 4 ++--
.../staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/relu.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/rnn.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/sigmoid.dml | 4 ++--
scripts/staging/SystemML-NN/nn/layers/softmax.dml | 4 ++--
.../staging/SystemML-NN/nn/layers/spatial_batch_norm.dml | 6 +++---
scripts/staging/SystemML-NN/nn/layers/tanh.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/adagrad.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/adam.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/rmsprop.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/sgd.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml | 4 ++--
scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml | 4 ++--
scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml | 4 ++--
scripts/staging/SystemML-NN/nn/test/grad_check.dml | 4 ++--
.../staging/SystemML-NN/nn/test/max_pool2d_simple.dml | 4 ++--
scripts/staging/SystemML-NN/nn/test/run_tests.dml | 4 ++--
scripts/staging/SystemML-NN/nn/test/test.dml | 4 ++--
scripts/staging/SystemML-NN/nn/test/util.dml | 4 ++--
scripts/staging/SystemML-NN/nn/util.dml | 4 ++--
39 files changed, 87 insertions(+), 86 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml
index 775926c..51bb6f5 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_lenet-predict.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml
index c23029f..03c3467 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_lenet-train.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
index e2895b8..a261b41 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_softmax-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_softmax-predict.dml b/scripts/staging/SystemML-NN/examples/mnist_softmax-predict.dml
index 52f31fd..353efd1 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_softmax-predict.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_softmax-predict.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_softmax-train.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_softmax-train.dml b/scripts/staging/SystemML-NN/examples/mnist_softmax-train.dml
index dff192e..fe3a9b2 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_softmax-train.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_softmax-train.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_softmax.dml b/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
index ee0d3cb..dc712f6 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_softmax.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/affine.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/affine.dml b/scripts/staging/SystemML-NN/nn/layers/affine.dml
index f9f8559..c9a740b 100644
--- a/scripts/staging/SystemML-NN/nn/layers/affine.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/affine.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,14 +20,15 @@
#-------------------------------------------------------------
/*
- * Fully-connected (affine) layer.
+ * Affine (fully-connected) layer.
*/
forward = function(matrix[double] X, matrix[double] W, matrix[double] b)
return (matrix[double] out) {
/*
- * Computes the forward pass for a fully-connected (affine) layer with
- * M neurons. The input data has N examples, each with D features.
+ * Computes the forward pass for an affine (fully-connected) layer
+ * with M neurons. The input data has N examples, each with D
+ * features.
*
* Inputs:
* - X: Inputs, of shape (N, D).
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml b/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
index 82240f7..caad100 100644
--- a/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/batch_norm.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
/*
- * Batch normalization layer.
+ * Batch Normalization layer.
*/
forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
index 435b3cf..7aeec16 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
index 29021cf..e7771ba 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
index 55552e1..63db502 100644
--- a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
/*
- * Cross-entropy loss function.
+ * Cross-Entropy loss function.
*/
forward = function(matrix[double] pred, matrix[double] y)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/dropout.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/dropout.dml b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
index b348642..a36878b 100644
--- a/scripts/staging/SystemML-NN/nn/layers/dropout.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
index 24b15e2..b74566d 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml b/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
index f643274..2b81c0b 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l1_reg.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
/*
- * L1 regularizataion.
+ * L1 regularization.
*/
forward = function(matrix[double] X, double lambda)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
index df8bc1c..0482f25 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml b/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
index 5074c06..7255efe 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l2_reg.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
/*
- * L2 regularizataion.
+ * L2 regularization.
*/
forward = function(matrix[double] X, double lambda)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
index 7dd85d3..15914f7 100644
--- a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/lstm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/lstm.dml b/scripts/staging/SystemML-NN/nn/layers/lstm.dml
index 44f2ef2..a75add4 100644
--- a/scripts/staging/SystemML-NN/nn/layers/lstm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/lstm.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
index 229b7b9..ef1499a 100644
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
index be4e195..65ba71f 100644
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/relu.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/relu.dml b/scripts/staging/SystemML-NN/nn/layers/relu.dml
index 6a4c15c..93a6e90 100644
--- a/scripts/staging/SystemML-NN/nn/layers/relu.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/relu.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/rnn.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/rnn.dml b/scripts/staging/SystemML-NN/nn/layers/rnn.dml
index cdceab8..3c6faae 100644
--- a/scripts/staging/SystemML-NN/nn/layers/rnn.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/rnn.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml b/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
index 185befb..2d85adc 100644
--- a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/softmax.dml b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
index 1751838..68a7bc7 100644
--- a/scripts/staging/SystemML-NN/nn/layers/softmax.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml b/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
index 0185a2c..6e57b05 100644
--- a/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/spatial_batch_norm.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -20,7 +20,7 @@
#-------------------------------------------------------------
/*
- * Spatial batch normalization layer.
+ * Spatial Batch Normalization layer.
*/
source("nn/util.dml") as util
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/layers/tanh.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/tanh.dml b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
index 589a574..d849d70 100644
--- a/scripts/staging/SystemML-NN/nn/layers/tanh.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
index 20b26c4..85b1c41 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/adam.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adam.dml b/scripts/staging/SystemML-NN/nn/optim/adam.dml
index 0607fa5..4b6fa2a 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adam.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adam.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
index 80c75a0..1feccaf 100644
--- a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/sgd.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd.dml b/scripts/staging/SystemML-NN/nn/optim/sgd.dml
index a3fc744..3ba7eba 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
index 2cb9890..85922da 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
index fee6585..3b62c6e 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
index efd99c3..05f0f7d 100644
--- a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index 27f4420..1b42b67 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
index 47dab3a..dee1a48 100644
--- a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
+++ b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
index 644662c..dc53cb9 100644
--- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml
+++ b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index 64fc519..958c2c5 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/test/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/util.dml b/scripts/staging/SystemML-NN/nn/test/util.dml
index 128e4db..e32a885 100644
--- a/scripts/staging/SystemML-NN/nn/test/util.dml
+++ b/scripts/staging/SystemML-NN/nn/test/util.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ac8ee2be/scripts/staging/SystemML-NN/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml
index 405d208..62a90f2 100644
--- a/scripts/staging/SystemML-NN/nn/util.dml
+++ b/scripts/staging/SystemML-NN/nn/util.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
[6/7] incubator-systemml git commit: [SYSTEMML-1453] Update Conv &
Max Pooling layer names to include "2D"
Posted by du...@apache.org.
[SYSTEMML-1453] Update Conv & Max Pooling layer names to include "2D"
This updates `conv*.dml` and `max_pool*.dml` to `conv2d*.dml` and
`max_pool2d*.dml` to allow for 1D and 3D variants in the future.
Closes #447.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/07039caa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/07039caa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/07039caa
Branch: refs/heads/master
Commit: 07039caa9629dd3a26aa66c9ec860cf7f7917724
Parents: 5c59e03
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:39:11 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:39:11 2017 -0700
----------------------------------------------------------------------
projects/breast_cancer/convnet.dml | 101 +++++----
.../SystemML-NN/examples/mnist_lenet.dml | 56 ++---
scripts/staging/SystemML-NN/nn/layers/conv.dml | 194 -----------------
.../staging/SystemML-NN/nn/layers/conv2d.dml | 194 +++++++++++++++++
.../SystemML-NN/nn/layers/conv2d_builtin.dml | 160 ++++++++++++++
.../SystemML-NN/nn/layers/conv_builtin.dml | 155 -------------
.../staging/SystemML-NN/nn/layers/max_pool.dml | 159 --------------
.../SystemML-NN/nn/layers/max_pool2d.dml | 159 ++++++++++++++
.../nn/layers/max_pool2d_builtin.dml | 103 +++++++++
.../SystemML-NN/nn/layers/max_pool_builtin.dml | 103 ---------
.../SystemML-NN/nn/test/conv2d_simple.dml | 215 +++++++++++++++++++
.../staging/SystemML-NN/nn/test/conv_simple.dml | 215 -------------------
.../staging/SystemML-NN/nn/test/grad_check.dml | 170 +++++++--------
.../SystemML-NN/nn/test/max_pool2d_simple.dml | 172 +++++++++++++++
.../SystemML-NN/nn/test/max_pool_simple.dml | 172 ---------------
.../staging/SystemML-NN/nn/test/run_tests.dml | 16 +-
scripts/staging/SystemML-NN/nn/test/test.dml | 115 +++++-----
17 files changed, 1248 insertions(+), 1211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/projects/breast_cancer/convnet.dml
----------------------------------------------------------------------
diff --git a/projects/breast_cancer/convnet.dml b/projects/breast_cancer/convnet.dml
index 5f115a2..85c7dd8 100644
--- a/projects/breast_cancer/convnet.dml
+++ b/projects/breast_cancer/convnet.dml
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -24,11 +24,11 @@
*/
# Imports
source("nn/layers/affine.dml") as affine
-source("nn/layers/conv_builtin.dml") as conv
+source("nn/layers/conv2d_builtin.dml") as conv2d
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
source("nn/layers/dropout.dml") as dropout
source("nn/layers/l2_reg.dml") as l2_reg
-source("nn/layers/max_pool_builtin.dml") as max_pool
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d
source("nn/layers/relu.dml") as relu
source("nn/layers/softmax.dml") as softmax
#source("nn/optim/adam.dml") as adam
@@ -96,9 +96,9 @@ train = function(matrix[double] X, matrix[double] Y,
F3 = 32 # num conv filters in conv3
N1 = 512 # num nodes in affine1
# Note: affine2 has K nodes, which is equal to the number of target dimensions (num classes)
- [Wc1, bc1] = conv::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win)
- [Wc2, bc2] = conv::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2))
- [Wc3, bc3] = conv::init(F3, F2, Hf, Wf) # inputs: (N, F2*(Hin/2^2)*(Win/2^2))
+ [Wc1, bc1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win)
+ [Wc2, bc2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2))
+ [Wc3, bc3] = conv2d::init(F3, F2, Hf, Wf) # inputs: (N, F2*(Hin/2^2)*(Win/2^2))
[Wa1, ba1] = affine::init(F3*(Hin/2^3)*(Win/2^3), N1) # inputs: (N, F3*(Hin/2^3)*(Win/2^3))
[Wa2, ba2] = affine::init(N1, K) # inputs: (N, N1)
Wa2 = Wa2 / sqrt(2) # different initialization, since being fed into softmax, instead of relu
@@ -145,17 +145,23 @@ train = function(matrix[double] X, matrix[double] Y,
# Compute forward pass
## conv layer 1: conv1 -> relu1 -> pool1
- [outc1, Houtc1, Woutc1] = conv::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
outc1r = relu::forward(outc1)
- [outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## conv layer 2: conv2 -> relu2 -> pool2
- [outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad)
+ [outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf,
+ stride, stride, pad, pad)
outc2r = relu::forward(outc2)
- [outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## conv layer 3: conv3 -> relu3 -> pool3
- [outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad)
+ [outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf,
+ stride, stride, pad, pad)
outc3r = relu::forward(outc3)
- [outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## affine layer 1: affine1 -> relu1 -> dropout1
outa1 = affine::forward(outc3p, Wa1, ba1)
outa1r = relu::forward(outa1)
@@ -176,17 +182,23 @@ train = function(matrix[double] X, matrix[double] Y,
douta1 = relu::backward(douta1r, outa1)
[doutc3p, dWa1, dba1] = affine::backward(douta1, outc3p, Wa1, ba1)
## conv layer 3: conv3 -> relu3 -> pool3
- doutc3r = max_pool::backward(doutc3p, Houtc3p, Woutc3p, outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2)
+ doutc3r = max_pool2d::backward(doutc3p, Houtc3p, Woutc3p, outc3r, F3, Houtc3, Woutc3,
+ Hf=2, Wf=2, strideh=2, stridew=2)
doutc3 = relu::backward(doutc3r, outc3)
- [doutc2p, dWc3, dbc3] = conv::backward(doutc3, Houtc3, Woutc3, outc2p, Wc3, bc2, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad)
+ [doutc2p, dWc3, dbc3] = conv2d::backward(doutc3, Houtc3, Woutc3, outc2p, Wc3, bc2, F2,
+ Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad)
## conv layer 2: conv2 -> relu2 -> pool2
- doutc2r = max_pool::backward(doutc2p, Houtc2p, Woutc2p, outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
+ doutc2r = max_pool2d::backward(doutc2p, Houtc2p, Woutc2p, outc2r, F2, Houtc2, Woutc2,
+ Hf=2, Wf=2, strideh=2, stridew=2)
doutc2 = relu::backward(doutc2r, outc2)
- [doutc1p, dWc2, dbc2] = conv::backward(doutc2, Houtc2, Woutc2, outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad)
+ [doutc1p, dWc2, dbc2] = conv2d::backward(doutc2, Houtc2, Woutc2, outc1p, Wc2, bc2, F1,
+ Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad)
## conv layer 1: conv1 -> relu1 -> pool1
- doutc1r = max_pool::backward(doutc1p, Houtc1p, Woutc1p, outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
+ doutc1r = max_pool2d::backward(doutc1p, Houtc1p, Woutc1p, outc1r, F1, Houtc1, Woutc1,
+ Hf=2, Wf=2, strideh=2, stridew=2)
doutc1 = relu::backward(doutc1r, outc1)
- [dX_batch, dWc1, dbc1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [dX_batch, dWc1, dbc1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, Wc1, bc1, C,
+ Hin, Win, Hf, Wf, stride, stride, pad, pad)
# Compute regularization backward pass
dWc1_reg = l2_reg::backward(Wc1, lambda)
@@ -222,7 +234,7 @@ train = function(matrix[double] X, matrix[double] Y,
#[ba1, mba1, vba1] = adam::update(ba1, dba1, lr, beta1, beta2, eps, t, mba1, vba1)
#[Wa2, mWa2, vWa2] = adam::update(Wa2, dWa2, lr, beta1, beta2, eps, t, mWa2, vWa2)
#[ba2, mba2, vba2] = adam::update(ba2, dba2, lr, beta1, beta2, eps, t, mba2, vba2)
-
+
# Compute loss & accuracy for training & validation data every `log_interval` iterations.
if (i %% log_interval == 0) {
# Compute training loss & accuracy
@@ -348,7 +360,8 @@ predict = function(matrix[double] X, int C, int Hin, int Win,
N = nrow(X)
# Network:
- # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> conv3 -> relu3 -> pool3 -> affine1 -> relu1 -> affine2 -> softmax
+ # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> conv3 -> relu3 -> pool3
+ # -> affine1 -> relu1 -> affine2 -> softmax
Hf = 3 # filter height
Wf = 3 # filter width
stride = 1
@@ -365,17 +378,23 @@ predict = function(matrix[double] X, int C, int Hin, int Win,
# so that it can be efficiently used for parallel predictions.
## Compute forward pass
### conv layer 1: conv1 -> relu1 -> pool1
- #[outc1, Houtc1, Woutc1] = conv::forward(X, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ #[outc1, Houtc1, Woutc1] = conv2d::forward(X, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride,
+ # pad, pad)
#outc1r = relu::forward(outc1)
- #[outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
+ #[outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ # strideh=2, stridew=2)
### conv layer 2: conv2 -> relu2 -> pool2
- #[outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad)
+ #[outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf,
+ # stride, stride, pad, pad)
#outc2r = relu::forward(outc2)
- #[outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
+ #[outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ # strideh=2, stridew=2)
### conv layer 3: conv3 -> relu3 -> pool3
- #[outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad)
+ #[outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf,
+ # stride, stride, pad, pad)
#outc3r = relu::forward(outc3)
- #[outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2)
+ #[outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2,
+ # strideh=2, stridew=2)
### affine layer 1: affine1 -> relu1 -> dropout
#outa1 = affine::forward(outc3p, Wa1, ba1)
#outa1r = relu::forward(outa1)
@@ -398,17 +417,23 @@ predict = function(matrix[double] X, int C, int Hin, int Win,
# Compute forward pass
## conv layer 1: conv1 -> relu1 -> pool1
- [outc1, Houtc1, Woutc1] = conv::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, Wc1, bc1, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
outc1r = relu::forward(outc1)
- [outc1p, Houtc1p, Woutc1p] = max_pool::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc1p, Houtc1p, Woutc1p] = max_pool2d::forward(outc1r, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## conv layer 2: conv2 -> relu2 -> pool2
- [outc2, Houtc2, Woutc2] = conv::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf, stride, stride, pad, pad)
+ [outc2, Houtc2, Woutc2] = conv2d::forward(outc1p, Wc2, bc2, F1, Houtc1p, Woutc1p, Hf, Wf,
+ stride, stride, pad, pad)
outc2r = relu::forward(outc2)
- [outc2p, Houtc2p, Woutc2p] = max_pool::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc2p, Houtc2p, Woutc2p] = max_pool2d::forward(outc2r, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## conv layer 3: conv3 -> relu3 -> pool3
- [outc3, Houtc3, Woutc3] = conv::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf, stride, stride, pad, pad)
+ [outc3, Houtc3, Woutc3] = conv2d::forward(outc2p, Wc3, bc3, F2, Houtc2p, Woutc2p, Hf, Wf,
+ stride, stride, pad, pad)
outc3r = relu::forward(outc3)
- [outc3p, Houtc3p, Woutc3p] = max_pool::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2, strideh=2, stridew=2)
+ [outc3p, Houtc3p, Woutc3p] = max_pool2d::forward(outc3r, F3, Houtc3, Woutc3, Hf=2, Wf=2,
+ strideh=2, stridew=2)
## affine layer 1: affine1 -> relu1 -> dropout
outa1 = affine::forward(outc3p, Wa1, ba1)
outa1r = relu::forward(outa1)
@@ -433,7 +458,7 @@ eval = function(matrix[double] probs, matrix[double] Y)
*
* Inputs:
* - probs: Class probabilities, of shape (N, K).
- * - Y: Target matrix, of shape (N,
+ * - Y: Target matrix, of shape (N,
*
* Outputs:
* - loss: Scalar loss, of shape (1).
@@ -448,7 +473,7 @@ eval = function(matrix[double] probs, matrix[double] Y)
generate_dummy_data = function()
return (matrix[double] X, matrix[double] Y, int C, int Hin, int Win) {
/*
- * Generate a dummy dataset similar to the MNIST dataset.
+ * Generate a dummy dataset similar to the breast cancer dataset.
*
* Outputs:
* - X: Input data matrix, of shape (N, D).
@@ -459,9 +484,9 @@ generate_dummy_data = function()
*/
# Generate dummy input data
N = 1024 # num examples
- C = 1 # num input channels
- Hin = 64 # input height
- Win = 64 # input width
+ C = 3 # num input channels
+ Hin = 256 # input height
+ Win = 256 # input width
K = 3 # num target classes
X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform"))
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
index f991487..e2895b8 100644
--- a/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
+++ b/scripts/staging/SystemML-NN/examples/mnist_lenet.dml
@@ -24,11 +24,11 @@
*/
# Imports
source("nn/layers/affine.dml") as affine
-source("nn/layers/conv_builtin.dml") as conv
+source("nn/layers/conv2d_builtin.dml") as conv2d
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
source("nn/layers/dropout.dml") as dropout
source("nn/layers/l2_reg.dml") as l2_reg
-source("nn/layers/max_pool_builtin.dml") as max_pool
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d
source("nn/layers/relu.dml") as relu
source("nn/layers/softmax.dml") as softmax
source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
@@ -81,8 +81,8 @@ train = function(matrix[double] X, matrix[double] y,
N3 = 512 # num nodes in affine3
# Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes)
- [W1, b1] = conv::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win)
- [W2, b2] = conv::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2))
+ [W1, b1] = conv2d::init(F1, C, Hf, Wf) # inputs: (N, C*Hin*Win)
+ [W2, b2] = conv2d::init(F2, F1, Hf, Wf) # inputs: (N, F1*(Hin/2)*(Win/2))
[W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3) # inputs: (N, F2*(Hin/2/2)*(Win/2/2))
[W4, b4] = affine::init(N3, K) # inputs: (N, N3)
W4 = W4 / sqrt(2) # different initialization, since being fed into softmax, instead of relu
@@ -114,17 +114,17 @@ train = function(matrix[double] X, matrix[double] y,
# Compute forward pass
## layer 1: conv1 -> relu1 -> pool1
- [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
outr1 = relu::forward(outc1)
- [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
## layer 2: conv2 -> relu2 -> pool2
- [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
- stride, stride, pad, pad)
+ [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
+ stride, stride, pad, pad)
outr2 = relu::forward(outc2)
- [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
## layer 3: affine3 -> relu3 -> dropout
outa3 = affine::forward(outp2, W3, b3)
outr3 = relu::forward(outa3)
@@ -165,17 +165,17 @@ train = function(matrix[double] X, matrix[double] y,
douta3 = relu::backward(doutr3, outa3)
[doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)
## layer 2: conv2 -> relu2 -> pool2
- doutr2 = max_pool::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
doutc2 = relu::backward(doutr2, outc2)
- [doutp1, dW2, db2] = conv::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1,
- Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
+ [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1,
+ Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
## layer 1: conv1 -> relu1 -> pool1
- doutr1 = max_pool::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
doutc1 = relu::backward(doutr1, outc1)
- [dX_batch, dW1, db1] = conv::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win,
- Hf, Wf, stride, stride, pad, pad)
+ [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win,
+ Hf, Wf, stride, stride, pad, pad)
# Compute regularization backward pass
dW1_reg = l2_reg::backward(W1, lambda)
@@ -260,17 +260,17 @@ predict = function(matrix[double] X, int C, int Hin, int Win,
# Compute forward pass
## layer 1: conv1 -> relu1 -> pool1
- [outc1, Houtc1, Woutc1] = conv::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
outr1 = relu::forward(outc1)
- [outp1, Houtp1, Woutp1] = max_pool::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
## layer 2: conv2 -> relu2 -> pool2
- [outc2, Houtc2, Woutc2] = conv::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
- stride, stride, pad, pad)
+ [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
+ stride, stride, pad, pad)
outr2 = relu::forward(outc2)
- [outp2, Houtp2, Woutp2] = max_pool::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
- strideh=2, stridew=2, pad=0, pad=0)
+ [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
+ strideh=2, stridew=2, pad=0, pad=0)
## layer 3: affine3 -> relu3
outa3 = affine::forward(outp2, W3, b3)
outr3 = relu::forward(outa3)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv.dml b/scripts/staging/SystemML-NN/nn/layers/conv.dml
deleted file mode 100644
index 435b3cf..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/conv.dml
+++ /dev/null
@@ -1,194 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Convolutional layer.
- */
-source("nn/util.dml") as util
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial convolutional layer with
- * F filters. The input data has N examples, each represented as a 3D
- * volume unrolled into a single vector.
- *
- * This implementation uses `im2col` internally for each image to
- * extract local image regions (patches) into columns, and then
- * performs a matrix multiplication with the filters to compute the
- * output maps.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * For same output height as input, set `padh = (Hf - 1) / 2`,
- * assuming `strideh = 1`.
- * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
- * preserves the spatial dimensions of the input.
- * - padw: Padding for left and right sides.
- * For same output width as input, set `padw = (Wf - 1) / 2`,
- * assuming `stridew = 1`.
- * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
- * preserves the spatial dimensions of the input.
- *
- * Outputs:
- * - out: Outputs, of shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create output volume
- out = matrix(0, rows=N, cols=F*Hout*Wout)
-
- # Convolution - im2col implementation
- parfor (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
-
- # Pad image
- Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw))
-
- # Extract local image patches into columns with im2col, of shape (C*Hf*Wf, Hout*Wout)
- Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
-
- # Convolve patches with filters
- outn = W %*% Xn_padded_cols + b # shape (F, Hout*Wout)
- out[n,] = matrix(outn, rows=1, cols=F*Hout*Wout) # reshape
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout,
- matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
- /*
- * Computes the backward pass for a 2D spatial convolutional layer
- * with F filters.
- *
- * This implementation uses `im2col` and `col2im` internally.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt `b`, of shape (F, 1).
- */
- N = nrow(X)
- F = nrow(W)
-
- # Create gradient volumes
- # Note: Create convenience gradient volumes for dW and db that will
- # allow for one gradient to be stored per example, allowing for
- # parallel computation at the expense of memory. We will reduce at
- # the end.
- dX = matrix(0, rows=N, cols=C*Hin*Win)
- dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf)
- dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1)
-
- # Partial derivatives for convolution - im2col implementation
- parfor (n in 1:N) { # all examples
- doutn = matrix(dout[n,], rows=F, cols=Hout*Wout)
-
- # Compute dW
- Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
- Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw))
- Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
- # dW = dW + doutn %*% t(Xn_padded_cols)
- dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf)
-
- # Compute db
- # db = db + rowSums(doutn)
- dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F)
-
- # Compute dX
- dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout)
- dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf,
- strideh, stridew, "add")
- dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw)
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape
- }
-
- # Reduce convenience gradient volumes with one gradient per example
- # into single gradients for W and b.
- dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf)
- db = matrix(colSums(dbN), rows=F, cols=1)
-}
-
-init = function(int F, int C, int Hf, int Wf)
- return (matrix[double] W, matrix[double] b) {
- /*
- * Initialize the parameters of this layer.
- *
- * Note: This is just a convenience function, and parameters
- * may be initialized manually if needed.
- *
- * We use the heuristic by He et al., which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * unit-Gaussian weights by a factor of sqrt(2/n), under the
- * assumption of relu neurons.
- * - http://arxiv.org/abs/1502.01852
- *
- * Inputs:
- * - F: Number of filters.
- * - C: Number of input channels (dimensionality of depth).
- * - Hf: Filter height.
- * - Wf: Filter width.
- *
- * Outputs:
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- */
- W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
- b = matrix(0, rows=F, cols=1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
new file mode 100644
index 0000000..435b3cf
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/layers/conv2d.dml
@@ -0,0 +1,194 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Convolutional layer.
+ */
+source("nn/util.dml") as util
+
+forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial convolutional layer with
+ * F filters. The input data has N examples, each represented as a 3D
+ * volume unrolled into a single vector.
+ *
+ * This implementation uses `im2col` internally for each image to
+ * extract local image regions (patches) into columns, and then
+ * performs a matrix multiplication with the filters to compute the
+ * output maps.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
+ * preserves the spatial dimensions of the input.
+ * - padw: Padding for left and right sides.
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
+ * preserves the spatial dimensions of the input.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ F = nrow(W)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Create output volume
+ out = matrix(0, rows=N, cols=F*Hout*Wout)
+
+ # Convolution - im2col implementation
+ parfor (n in 1:N) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
+
+ # Pad image
+ Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw))
+
+ # Extract local image patches into columns with im2col, of shape (C*Hf*Wf, Hout*Wout)
+ Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
+
+ # Convolve patches with filters
+ outn = W %*% Xn_padded_cols + b # shape (F, Hout*Wout)
+ out[n,] = matrix(outn, rows=1, cols=F*Hout*Wout) # reshape
+ }
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout,
+ matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
+ /*
+ * Computes the backward pass for a 2D spatial convolutional layer
+ * with F filters.
+ *
+ * This implementation uses `im2col` and `col2im` internally.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * - padw: Padding for left and right sides.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
+ */
+ N = nrow(X)
+ F = nrow(W)
+
+ # Create gradient volumes
+ # Note: Create convenience gradient volumes for dW and db that will
+ # allow for one gradient to be stored per example, allowing for
+ # parallel computation at the expense of memory. We will reduce at
+ # the end.
+ dX = matrix(0, rows=N, cols=C*Hin*Win)
+ dWN = matrix(0, rows=N, cols=F*C*Hf*Wf) # dW = matrix(0, rows=F, cols=C*Hf*Wf)
+ dbN = matrix(0, rows=N, cols=F) # db = matrix(0, rows=F, cols=1)
+
+ # Partial derivatives for convolution - im2col implementation
+ parfor (n in 1:N) { # all examples
+ doutn = matrix(dout[n,], rows=F, cols=Hout*Wout)
+
+ # Compute dW
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
+ Xn_padded = util::pad_image(Xn, Hin, Win, padh, padw, 0) # shape (C, (Hin+2*padh)*(Win+2*padw))
+ Xn_padded_cols = util::im2col(Xn_padded, Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
+ # dW = dW + doutn %*% t(Xn_padded_cols)
+ dWN[n,] = matrix(doutn %*% t(Xn_padded_cols), rows=1, cols=F*C*Hf*Wf)
+
+ # Compute db
+ # db = db + rowSums(doutn)
+ dbN[n,] = matrix(rowSums(doutn), rows=1, cols=F)
+
+ # Compute dX
+ dXn_padded_cols = t(W) %*% doutn # shape (C*Hf*Wf, Hout*Wout)
+ dXn_padded = util::col2im(dXn_padded_cols, C, Hin+2*padh, Win+2*padw, Hf, Wf,
+ strideh, stridew, "add")
+ dXn = util::unpad_image(dXn_padded, Hin, Win, padh, padw)
+ dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win) # reshape
+ }
+
+ # Reduce convenience gradient volumes with one gradient per example
+ # into single gradients for W and b.
+ dW = matrix(colSums(dWN), rows=F, cols=C*Hf*Wf)
+ db = matrix(colSums(dbN), rows=F, cols=1)
+}
+
+init = function(int F, int C, int Hf, int Wf)
+ return (matrix[double] W, matrix[double] b) {
+ /*
+ * Initialize the parameters of this layer.
+ *
+ * Note: This is just a convenience function, and parameters
+ * may be initialized manually if needed.
+ *
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
+ *
+ * Inputs:
+ * - F: Number of filters.
+ * - C: Number of input channels (dimensionality of depth).
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ *
+ * Outputs:
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ */
+ W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
+ b = matrix(0, rows=F, cols=1)
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
new file mode 100644
index 0000000..29021cf
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/layers/conv2d_builtin.dml
@@ -0,0 +1,160 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Convolutional layer.
+ *
+ * This implementation uses a built-in operator for higher performance.
+ */
+
+forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial convolutional layer with
+ * F filters. The input data has N examples, each represented as a 3D
+ * volume unrolled into a single vector.
+ *
+ * This implementation uses a built-in operator for higher
+ * performance.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
+ * preserves the spatial dimensions of the input.
+ * - padw: Padding for left and right sides.
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
+ * preserves the spatial dimensions of the input.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ F = nrow(W)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Convolution - built-in implementation
+ out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf],
+ stride=[strideh,stridew], padding=[padh,padw])
+
+ # Add bias term to each output filter
+ out = bias_add(out, b)
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout,
+ matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
+ /*
+ * Computes the backward pass for a 2D spatial convolutional layer
+ * with F filters.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * For same output height as input, set `padh = (Hf - 1) / 2`,
+ * assuming `strideh = 1`.
+ * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
+ * preserves the spatial dimensions of the input.
+ * - padw: Padding for left and right sides.
+ * For same output width as input, set `padw = (Wf - 1) / 2`,
+ * assuming `stridew = 1`.
+ * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
+ * preserves the spatial dimensions of the input.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
+ */
+ N = nrow(X)
+ F = nrow(W)
+
+ # Partial derivatives for convolution - built-in implementation
+ dW = conv2d_backward_filter(X, dout, stride=[strideh,stridew], padding=[padh,padw],
+ input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf])
+ dX = conv2d_backward_data(W, dout, stride=[strideh, stridew], padding=[padh,padw],
+ input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf])
+
+ # Partial derivatives for bias vector
+ db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout))
+}
+
+init = function(int F, int C, int Hf, int Wf)
+ return (matrix[double] W, matrix[double] b) {
+ /*
+ * Initialize the parameters of this layer.
+ *
+ * Note: This is just a convenience function, and parameters
+ * may be initialized manually if needed.
+ *
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
+ *
+ * Inputs:
+ * - F: Number of filters.
+ * - C: Number of input channels (dimensionality of depth).
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ *
+ * Outputs:
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ */
+ W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
+ b = matrix(0, rows=F, cols=1)
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
deleted file mode 100644
index c2b809e..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
+++ /dev/null
@@ -1,155 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Convolutional layer.
- */
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial convolutional layer with
- * F filters. The input data has N examples, each represented as a 3D
- * volume unrolled into a single vector.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * For same output height as input, set `padh = (Hf - 1) / 2`,
- * assuming `strideh = 1`.
- * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
- * preserves the spatial dimensions of the input.
- * - padw: Padding for left and right sides.
- * For same output width as input, set `padw = (Wf - 1) / 2`,
- * assuming `stridew = 1`.
- * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
- * preserves the spatial dimensions of the input.
- *
- * Outputs:
- * - out: Outputs, of shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Convolution - built-in implementation
- out = conv2d(X, W, input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf],
- stride=[strideh,stridew], padding=[padh,padw])
-
- # Add bias term to each output filter
- out = bias_add(out, b)
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout,
- matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
- /*
- * Computes the backward pass for a 2D spatial convolutional layer
- * with F filters.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * For same output height as input, set `padh = (Hf - 1) / 2`,
- * assuming `strideh = 1`.
- * More generally, `padh = (Hin*(strideh-1) + Hf - strideh) / 2`
- * preserves the spatial dimensions of the input.
- * - padw: Padding for left and right sides.
- * For same output width as input, set `padw = (Wf - 1) / 2`,
- * assuming `stridew = 1`.
- * More generally, `padw = (Win*(stridew-1) + Wf - stridew) / 2`
- * preserves the spatial dimensions of the input.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt `b`, of shape (F, 1).
- */
- N = nrow(X)
- F = nrow(W)
-
- # Partial derivatives for convolution - built-in implementation
- dW = conv2d_backward_filter(X, dout, stride=[strideh,stridew], padding=[padh,padw],
- input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf])
- dX = conv2d_backward_data(W, dout, stride=[strideh, stridew], padding=[padh,padw],
- input_shape=[N,C,Hin,Win], filter_shape=[F,C,Hf,Wf])
-
- # Partial derivatives for bias vector
- db = rowSums(matrix(colSums(dout), rows=F, cols=Hout*Wout))
-}
-
-init = function(int F, int C, int Hf, int Wf)
- return (matrix[double] W, matrix[double] b) {
- /*
- * Initialize the parameters of this layer.
- *
- * Note: This is just a convenience function, and parameters
- * may be initialized manually if needed.
- *
- * We use the heuristic by He et al., which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * unit-Gaussian weights by a factor of sqrt(2/n), under the
- * assumption of relu neurons.
- * - http://arxiv.org/abs/1502.01852
- *
- * Inputs:
- * - F: Number of filters.
- * - C: Number of input channels (dimensionality of depth).
- * - Hf: Filter height.
- * - Wf: Filter width.
- *
- * Outputs:
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- */
- W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
- b = matrix(0, rows=F, cols=1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
deleted file mode 100644
index a12877f..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool.dml
+++ /dev/null
@@ -1,159 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Max pooling layer.
- */
-source("nn/util.dml") as util
-
-forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * This implementation uses `im2col` internally for each image to
- * extract local image regions (patches) of each channel slice into
- * columns, and then performs max pooling over the patches to compute
- * the output maps.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
- pad_value = -1/0 # in max pooling we pad with -infinity
-
- # Create output volume
- out = matrix(0, rows=N, cols=C*Hout*Wout)
-
- # Max pooling - im2col implementation
- parfor (n in 1:N) { # all examples
- img = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
-
- if (padh > 0 | padw > 0) {
- # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw))
- img = util::pad_image(img, Hin, Win, padh, padw, pad_value)
- }
-
- img_maxes = matrix(0, rows=C, cols=Hout*Wout) # zeros
- parfor (c in 1:C) { # all channels
- # Extract local image slice patches into columns with im2col, of shape (Hf*Wf, Hout*Wout)
- img_slice_cols = util::im2col(img[c,], Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
-
- # Max pooling on patches
- img_maxes[c,] = colMaxs(img_slice_cols)
- }
-
- out[n,] = matrix(img_maxes, rows=1, cols=C*Hout*Wout)
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Input data matrix, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- */
- N = nrow(X)
- pad_value = -1/0 # in max pooling we pad with -infinity
-
- # Create gradient volume
- dX = matrix(0, rows=N, cols=C*Hin*Win)
-
- # Gradient of max pooling
- parfor (n in 1:N, check=0) { # all examples
- img = matrix(X[n,], rows=C, cols=Hin*Win)
- if (padh > 0 | padw > 0) {
- # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw))
- img = util::pad_image(img, Hin, Win, padh, padw, pad_value)
- }
-
- dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- parfor (c in 1:C, check=0) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- for (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1)*strideh + 1
- for (wout in 1:Wout) { # all output columns
- win = (wout-1)*stridew + 1
- img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
- max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
- # gradient passes through only for the max value(s) in this patch
- dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
- dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
- + dimg_slice_patch
- }
- }
- dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
-
- if (padh > 0 | padw > 0) {
- # Unpad image gradient
- dimg = util::unpad_image(dimg, Hin, Win, padh, padw) # shape (C, (Hin+2*padh)*(Win+2*padw))
- }
- dX[n,] = matrix(dimg, rows=1, cols=C*Hin*Win)
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
new file mode 100644
index 0000000..229b7b9
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d.dml
@@ -0,0 +1,159 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Max Pooling layer.
+ */
+source("nn/util.dml") as util
+
+forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * This implementation uses `im2col` internally for each image to
+ * extract local image regions (patches) of each channel slice into
+ * columns, and then performs max pooling over the patches to compute
+ * the output maps.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+ pad_value = -1/0 # in max pooling we pad with -infinity
+
+ # Create output volume
+ out = matrix(0, rows=N, cols=C*Hout*Wout)
+
+ # Max pooling - im2col implementation
+ parfor (n in 1:N) { # all examples
+ img = matrix(X[n,], rows=C, cols=Hin*Win) # reshape
+
+ if (padh > 0 | padw > 0) {
+ # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw))
+ img = util::pad_image(img, Hin, Win, padh, padw, pad_value)
+ }
+
+ img_maxes = matrix(0, rows=C, cols=Hout*Wout) # zeros
+ parfor (c in 1:C) { # all channels
+ # Extract local image slice patches into columns with im2col, of shape (Hf*Wf, Hout*Wout)
+ img_slice_cols = util::im2col(img[c,], Hin+2*padh, Win+2*padw, Hf, Wf, strideh, stridew)
+
+ # Max pooling on patches
+ img_maxes[c,] = colMaxs(img_slice_cols)
+ }
+
+ out[n,] = matrix(img_maxes, rows=1, cols=C*Hout*Wout)
+ }
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX) {
+ /*
+ * Computes the backward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Input data matrix, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ */
+ N = nrow(X)
+ pad_value = -1/0 # in max pooling we pad with -infinity
+
+ # Create gradient volume
+ dX = matrix(0, rows=N, cols=C*Hin*Win)
+
+ # Gradient of max pooling
+ parfor (n in 1:N, check=0) { # all examples
+ img = matrix(X[n,], rows=C, cols=Hin*Win)
+ if (padh > 0 | padw > 0) {
+ # Pad image to shape (C, (Hin+2*padh)*(Win+2*padw))
+ img = util::pad_image(img, Hin, Win, padh, padw, pad_value)
+ }
+
+ dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
+ parfor (c in 1:C, check=0) { # all channels
+ img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
+ for (hout in 1:Hout, check=0) { # all output rows
+ hin = (hout-1)*strideh + 1
+ for (wout in 1:Wout) { # all output columns
+ win = (wout-1)*stridew + 1
+ img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
+ max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
+ # gradient passes through only for the max value(s) in this patch
+ dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
+ dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
+ + dimg_slice_patch
+ }
+ }
+ dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
+ }
+
+ if (padh > 0 | padw > 0) {
+ # Unpad image gradient
+ dimg = util::unpad_image(dimg, Hin, Win, padh, padw) # shape (C, (Hin+2*padh)*(Win+2*padw))
+ }
+ dX[n,] = matrix(dimg, rows=1, cols=C*Hin*Win)
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
new file mode 100644
index 0000000..be4e195
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/layers/max_pool2d_builtin.dml
@@ -0,0 +1,103 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Max Pooling layer.
+ *
+ * This implementation uses a built-in operator for higher performance.
+ */
+
+forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * This implementation uses a built-in operator for higher
+ * performance.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ Hout = as.integer((Hin-Hf)/strideh + 1)
+ Wout = as.integer((Win-Wf)/stridew + 1)
+
+ # Max pooling - built-in implementation
+ out = max_pool(X, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf],
+ stride=[strideh,stridew], padding=[padh,padw])
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX) {
+ /*
+ * Computes the backward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ */
+ N = nrow(X)
+
+ # Gradient of max pooling
+ dX = max_pool_backward(X, dout, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf],
+ stride=[strideh,stridew], padding=[padh,padw])
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
deleted file mode 100644
index f1cb863..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/max_pool_builtin.dml
+++ /dev/null
@@ -1,103 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Max pooling layer.
- */
-
-forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * This implementation uses `im2col` internally for each image to
- * extract local image regions (patches) of each channel slice into
- * columns, and then performs max pooling over the patches to compute
- * the output maps.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- Hout = as.integer((Hin-Hf)/strideh + 1)
- Wout = as.integer((Win-Wf)/stridew + 1)
-
- # Max pooling - built-in implementation
- out = max_pool(X, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf],
- stride=[strideh,stridew], padding=[padh,padw])
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- */
- N = nrow(X)
-
- # Gradient of max pooling
- dX = max_pool_backward(X, dout, input_shape=[N,C,Hin,Win], pool_size=[Hf,Wf],
- stride=[strideh,stridew], padding=[padh,padw])
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
new file mode 100644
index 0000000..efd99c3
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
@@ -0,0 +1,215 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * 2D Convolutional layer.
+ *
+ * This implementation is intended to be a simple, reference version.
+ */
+
+forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial convolutional layer with
+ * F filters. The input data has N examples, each represented as a 3D
+ * volume unrolled into a single vector.
+ *
+ * This implementation is intended to be a simple, reference version.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * - padw: Padding for left and right sides.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ F = nrow(W)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Create output volume
+ out = matrix(0, rows=N, cols=F*Hout*Wout)
+
+ # Convolution - Simple reference implementation
+ parfor (n in 1:N) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+ # Pad image
+ Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ # Convolve image with filters
+ parfor (f in 1:F, check=0) { # all filters
+ parfor (hout in 1:Hout, check=0) { # all output rows
+ h0 = (hout-1)*strideh + 1
+ parfor (wout in 1:Wout, check=0) { # all output columns
+ w0 = (wout-1)*stridew + 1
+ # Create a patch of the input example corresponding spatially to the filter sizes
+ Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
+ parfor (c in 1:C, check=0) {
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
+ Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1,
+ cols=Hf*Wf) # reshape
+ }
+ out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] =
+ W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,]
+ }
+ }
+ }
+ }
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout,
+ matrix[double] X, matrix[double] W, matrix[double] b,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
+ /*
+ * Computes the backward pass for a 2D spatial convolutional layer
+ * with F filters.
+ *
+ * This implementation is intended to be a simple, reference version.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, F*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * - padw: Padding for left and right sides.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+ * - db: Gradient wrt `b`, of shape (F, 1).
+ */
+ N = nrow(X)
+ F = nrow(W)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Create gradient volumes
+ dX = matrix(0, rows=N, cols=C*Hin*Win)
+ dW = matrix(0, rows=F, cols=C*Hf*Wf)
+ db = matrix(0, rows=F, cols=1)
+
+ # Partial derivatives for convolution - Simple reference implementation
+ for (n in 1:N) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+ # Pad image
+ Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
+ for (f in 1:F) { # all filters
+ for (hout in 1:Hout) { # all output rows
+ h0 = (hout-1) * strideh + 1
+ for (wout in 1:Wout) { # all output columns
+ w0 = (wout-1) * stridew + 1
+ # Create a patch of the input example corresponding spatially to the filter sizes
+ Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
+ dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout],
+ rows=C, cols=Hf*Wf) # reshape
+ for (c in 1:C) {
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
+ Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf],
+ rows=1, cols=Hf*Wf) # reshape
+ dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
+ dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,],
+ rows=Hf, cols=Wf) # reshape
+ dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice,
+ rows=1, cols=(Hin+2*padh)*(Win+2*padw))
+ }
+ dW[f,] = dW[f,]
+ + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf)
+ * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
+ db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
+ }
+ }
+ }
+ # Unpad derivs on input
+ dXn = matrix(0, rows=C, cols=Hin*Win)
+ parfor (c in 1:C, check=0) {
+ dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
+ dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
+ dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
+ }
+ dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
+ }
+}
+
+init = function(int F, int C, int Hf, int Wf)
+ return (matrix[double] W, matrix[double] b) {
+ /*
+ * Initialize the parameters of this layer.
+ *
+ * We use the heuristic by He et al., which limits the magnification
+ * of inputs/gradients during forward/backward passes by scaling
+ * unit-Gaussian weights by a factor of sqrt(2/n), under the
+ * assumption of relu neurons.
+ * - http://arxiv.org/abs/1502.01852
+ *
+ * Inputs:
+ * - F: Number of filters.
+ * - C: Number of input channels (dimensionality of depth).
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ *
+ * Outputs:
+ * - W: Weights, of shape (F, C*Hf*Wf).
+ * - b: Biases, of shape (F, 1).
+ */
+ W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
+ b = matrix(0, rows=F, cols=1)
+}
+
[5/7] incubator-systemml git commit: [SYSTEMML-1453] Update Conv &
Max Pooling layer names to include "2D"
Posted by du...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
deleted file mode 100644
index efd99c3..0000000
--- a/scripts/staging/SystemML-NN/nn/test/conv_simple.dml
+++ /dev/null
@@ -1,215 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Convolutional layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial convolutional layer with
- * F filters. The input data has N examples, each represented as a 3D
- * volume unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - out: Outputs, of shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create output volume
- out = matrix(0, rows=N, cols=F*Hout*Wout)
-
- # Convolution - Simple reference implementation
- parfor (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- # Convolve image with filters
- parfor (f in 1:F, check=0) { # all filters
- parfor (hout in 1:Hout, check=0) { # all output rows
- h0 = (hout-1)*strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- w0 = (wout-1)*stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- parfor (c in 1:C, check=0) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1,
- cols=Hf*Wf) # reshape
- }
- out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] =
- W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,]
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout,
- matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
- /*
- * Computes the backward pass for a 2D spatial convolutional layer
- * with F filters.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt `b`, of shape (F, 1).
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create gradient volumes
- dX = matrix(0, rows=N, cols=C*Hin*Win)
- dW = matrix(0, rows=F, cols=C*Hf*Wf)
- db = matrix(0, rows=F, cols=1)
-
- # Partial derivatives for convolution - Simple reference implementation
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (f in 1:F) { # all filters
- for (hout in 1:Hout) { # all output rows
- h0 = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- w0 = (wout-1) * stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout],
- rows=C, cols=Hf*Wf) # reshape
- for (c in 1:C) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf],
- rows=1, cols=Hf*Wf) # reshape
- dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,],
- rows=Hf, cols=Wf) # reshape
- dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice,
- rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
- dW[f,] = dW[f,]
- + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf)
- * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- }
- }
- }
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
-init = function(int F, int C, int Hf, int Wf)
- return (matrix[double] W, matrix[double] b) {
- /*
- * Initialize the parameters of this layer.
- *
- * We use the heuristic by He et al., which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * unit-Gaussian weights by a factor of sqrt(2/n), under the
- * assumption of relu neurons.
- * - http://arxiv.org/abs/1502.01852
- *
- * Inputs:
- * - F: Number of filters.
- * - C: Number of input channels (dimensionality of depth).
- * - Hf: Filter height.
- * - Wf: Filter width.
- *
- * Outputs:
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- */
- W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
- b = matrix(0, rows=F, cols=1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index ba9a317..27f4420 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -24,8 +24,8 @@
*/
source("nn/layers/affine.dml") as affine
source("nn/layers/batch_norm.dml") as batch_norm
-source("nn/layers/conv.dml") as conv
-source("nn/layers/conv_builtin.dml") as conv_builtin
+source("nn/layers/conv2d.dml") as conv2d
+source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
source("nn/layers/dropout.dml") as dropout
source("nn/layers/l1_loss.dml") as l1_loss
@@ -34,16 +34,16 @@ source("nn/layers/l2_loss.dml") as l2_loss
source("nn/layers/l2_reg.dml") as l2_reg
source("nn/layers/log_loss.dml") as log_loss
source("nn/layers/lstm.dml") as lstm
-source("nn/layers/max_pool.dml") as max_pool
-source("nn/layers/max_pool_builtin.dml") as max_pool_builtin
+source("nn/layers/max_pool2d.dml") as max_pool2d
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
source("nn/layers/relu.dml") as relu
source("nn/layers/rnn.dml") as rnn
source("nn/layers/sigmoid.dml") as sigmoid
source("nn/layers/softmax.dml") as softmax
source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
-source("nn/test/conv_simple.dml") as conv_simple
-source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/conv2d_simple.dml") as conv2d_simple
+source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
source("nn/test/util.dml") as test_util
affine = function() {
@@ -229,11 +229,11 @@ batch_norm = function() {
}
}
-conv = function() {
+conv2d = function() {
/*
- * Gradient check for the convolutional layer using `im2col`.
+ * Gradient check for the 2D convolutional layer using `im2col`.
*/
- print("Grad checking the `im2col` convolutional layer with L2 loss.")
+ print("Grad checking the `im2col` 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -249,13 +249,13 @@ conv = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv::init(F, C, Hf, Wf)
+ [W, b] = conv2d::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [dX, dW, db] = conv2d::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -265,10 +265,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -284,10 +284,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -303,10 +303,10 @@ conv = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -317,12 +317,12 @@ conv = function() {
}
}
-conv_builtin = function() {
+conv2d_builtin = function() {
/*
- * Gradient check for the convolutional layer using built-in
+ * Gradient check for the 2D convolutional layer using built-in
* functions.
*/
- print("Grad checking the built-in convolutional layer with L2 loss.")
+ print("Grad checking the built-in 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -338,13 +338,14 @@ conv_builtin = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv_builtin::init(F, C, Hf, Wf)
+ [W, b] = conv2d_builtin::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [dX, dW, db] = conv2d_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -354,12 +355,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -375,12 +376,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -396,12 +397,12 @@ conv_builtin = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -412,11 +413,11 @@ conv_builtin = function() {
}
}
-conv_simple = function() {
+conv2d_simple = function() {
/*
- * Gradient check for the simple reference convolutional layer.
+ * Gradient check for the simple reference 2D convolutional layer.
*/
- print("Grad checking the simple reference convolutional layer with L2 loss.")
+ print("Grad checking the simple reference 2D convolutional layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -432,13 +433,13 @@ conv_simple = function() {
y = rand(rows=N, cols=F*Hin*Win)
# Create layers
- [W, b] = conv_simple::init(F, C, Hf, Wf)
+ [W, b] = conv2d_simple::init(F, C, Hf, Wf)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [dX, dW, db] = conv2d_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -448,12 +449,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -469,12 +470,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(W[i,j])
W[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
W[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
W[i,j] = old # reset
dW_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -490,12 +491,12 @@ conv_simple = function() {
# Compute numerical derivative
old = as.scalar(b[i,j])
b[i,j] = old - h
- [outmh, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
b[i,j] = old + h
- [outph, Hout, Wout] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
b[i,j] = old # reset
db_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -898,11 +899,11 @@ lstm = function() {
}
}
-max_pool = function() {
+max_pool2d = function() {
/*
- * Gradient check for the max pooling layer.
+ * Gradient check for the 2D max pooling layer.
*/
- print("Grad checking the max pooling layer with L2 loss.")
+ print("Grad checking the 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -921,9 +922,9 @@ max_pool = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
# Grad check
h = 1e-5
@@ -932,10 +933,10 @@ max_pool = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outmh, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [outph, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -947,11 +948,11 @@ max_pool = function() {
}
}
-max_pool_builtin = function() {
+max_pool2d_builtin = function() {
/*
- * Gradient check for the max pooling layer.
+ * Gradient check for the 2D max pooling layer.
*/
- print("Grad checking the built-in max pooling layer with L2 loss.")
+ print("Grad checking the built-in 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -970,10 +971,11 @@ max_pool_builtin = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ dX = max_pool2d_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -982,12 +984,12 @@ max_pool_builtin = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
@@ -999,11 +1001,11 @@ max_pool_builtin = function() {
}
}
-max_pool_simple = function() {
+max_pool2d_simple = function() {
/*
- * Gradient check for the simple reference max pooling layer.
+ * Gradient check for the simple reference 2D max pooling layer.
*/
- print("Grad checking the simple reference max pooling layer with L2 loss.")
+ print("Grad checking the simple reference 2D max pooling layer with L2 loss.")
# Generate data
N = 2 # num examples
@@ -1022,10 +1024,10 @@ max_pool_simple = function() {
y = rand(rows=N, cols=C*Hout*Wout)
# Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
dout = l2_loss::backward(out, y)
- dX = max_pool_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ dX = max_pool2d_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
# Grad check
h = 1e-5
@@ -1034,12 +1036,12 @@ max_pool_simple = function() {
# Compute numerical derivative
old = as.scalar(X[i,j])
X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outmh, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossmh = l2_loss::forward(outmh, y)
X[i,j] = old + h
- [outph, Hout, Wout] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
+ [outph, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
+ pad, pad)
lossph = l2_loss::forward(outph, y)
X[i,j] = old # reset
dX_num = (lossph-lossmh) / (2*h) # numerical derivative
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
new file mode 100644
index 0000000..47dab3a
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
@@ -0,0 +1,172 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Max Pooling layer.
+ *
+ * This implementation is intended to be a simple, reference version.
+ */
+
+forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] out, int Hout, int Wout) {
+ /*
+ * Computes the forward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * This implementation is intended to be a simple, reference version.
+ *
+ * Inputs:
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - out: Outputs, of shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ */
+ N = nrow(X)
+ Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
+ Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
+
+ # Create output volume
+ out = matrix(0, rows=N, cols=C*Hout*Wout)
+
+ # Max pooling
+ parfor (n in 1:N, check=0) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+
+ # Pad image
+ pad_value = -1/0
+ Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw))
+
+ parfor (c in 1:C, check=0) { # all channels
+ img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ parfor (hout in 1:Hout, check=0) { # all output rows
+ hin = (hout-1) * strideh + 1
+ parfor (wout in 1:Wout, check=0) { # all output columns
+ win = (wout-1) * stridew + 1
+ out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1,
+ win:win+Wf-1])
+ }
+ }
+ }
+ }
+}
+
+backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
+ int C, int Hin, int Win, int Hf, int Wf,
+ int strideh, int stridew, int padh, int padw)
+ return (matrix[double] dX) {
+ /*
+ * Computes the backward pass for a 2D spatial max pooling layer.
+ * The input data has N examples, each represented as a 3D volume
+ * unrolled into a single vector.
+ *
+ * Inputs:
+ * - dout: Gradient wrt `out` from upstream, of
+ * shape (N, C*Hout*Wout).
+ * - Hout: Output height.
+ * - Wout: Output width.
+ * - X: Inputs, of shape (N, C*Hin*Win).
+ * - C: Number of input channels (dimensionality of input depth).
+ * - Hin: Input height.
+ * - Win: Input width.
+ * - Hf: Filter height.
+ * - Wf: Filter width.
+ * - strideh: Stride over height.
+ * - stridew: Stride over width.
+ * - padh: Padding for top and bottom sides.
+ * A typical value is 0.
+ * - padw: Padding for left and right sides.
+ * A typical value is 0.
+ *
+ * Outputs:
+ * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+ */
+ N = nrow(X)
+
+ # Create gradient volume
+ dX = matrix(0, rows=N, cols=C*Hin*Win)
+
+ # Gradient of max pooling
+ for (n in 1:N) { # all examples
+ Xn = matrix(X[n,], rows=C, cols=Hin*Win)
+
+ # Pad image
+ pad_value = -1/0
+ Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
+ parfor (c in 1:C) {
+ Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
+ Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
+ Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
+ }
+ img = Xn_padded
+
+ dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
+ for (c in 1:C) { # all channels
+ img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
+ dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
+ for (hout in 1:Hout, check=0) { # all output rows
+ hin = (hout-1) * strideh + 1
+ for (wout in 1:Wout) { # all output columns
+ win = (wout-1) * stridew + 1
+ img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
+ max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
+ # gradient passes through only for the max value(s) in this patch
+ dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
+ dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
+ + dimg_slice_patch
+ }
+ }
+ dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
+ }
+
+ # Unpad derivs on input
+ dXn = matrix(0, rows=C, cols=Hin*Win)
+ parfor (c in 1:C, check=0) {
+ dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
+ dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
+ dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
+ }
+ dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
+ }
+}
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
deleted file mode 100644
index 786b0a1..0000000
--- a/scripts/staging/SystemML-NN/nn/test/max_pool_simple.dml
+++ /dev/null
@@ -1,172 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Max pooling layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- Hout = as.integer((Hin + 2*padh - Hf)/strideh + 1)
- Wout = as.integer((Win + 2*padw - Wf)/stridew + 1)
-
- # Create output volume
- out = matrix(0, rows=N, cols=C*Hout*Wout)
-
- # Max pooling
- parfor (n in 1:N, check=0) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw))
-
- parfor (c in 1:C, check=0) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- parfor (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- win = (wout-1) * stridew + 1
- out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1,
- win:win+Wf-1])
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- */
- N = nrow(X)
-
- # Create gradient volume
- dX = matrix(0, rows=N, cols=C*Hin*Win)
-
- # Gradient of max pooling
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded
-
- dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (c in 1:C) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- for (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- win = (wout-1) * stridew + 1
- img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
- max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
- # gradient passes through only for the max value(s) in this patch
- dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
- dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
- + dimg_slice_patch
- }
- }
- dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
-
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
index 86bb77b..644662c 100644
--- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml
+++ b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
@@ -38,16 +38,16 @@ tmp = grad_check::log_loss()
# Other layers
tmp = grad_check::affine()
tmp = grad_check::batch_norm()
-tmp = grad_check::conv_simple()
-tmp = grad_check::conv()
-tmp = grad_check::conv_builtin()
+tmp = grad_check::conv2d_simple()
+tmp = grad_check::conv2d()
+tmp = grad_check::conv2d_builtin()
tmp = grad_check::dropout()
tmp = grad_check::l1_reg()
tmp = grad_check::l2_reg()
tmp = grad_check::lstm()
-tmp = grad_check::max_pool_simple()
-tmp = grad_check::max_pool()
-tmp = grad_check::max_pool_builtin()
+tmp = grad_check::max_pool2d_simple()
+tmp = grad_check::max_pool2d()
+tmp = grad_check::max_pool2d_builtin()
tmp = grad_check::relu()
tmp = grad_check::rnn()
tmp = grad_check::sigmoid()
@@ -72,9 +72,9 @@ print("---")
tmp = test::batch_norm()
tmp = test::im2col()
tmp = test::padding()
-tmp = test::conv()
+tmp = test::conv2d()
tmp = test::cross_entropy_loss()
-tmp = test::max_pool()
+tmp = test::max_pool2d()
tmp = test::spatial_batch_norm()
tmp = test::tanh()
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/07039caa/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index 8fb0d04..64fc519 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -23,23 +23,23 @@
* Various tests, not including gradient checks.
*/
source("nn/layers/batch_norm.dml") as batch_norm
-source("nn/layers/conv.dml") as conv
-source("nn/layers/conv_builtin.dml") as conv_builtin
+source("nn/layers/conv2d.dml") as conv2d
+source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
-source("nn/layers/max_pool.dml") as max_pool
-source("nn/layers/max_pool_builtin.dml") as max_pool_builtin
+source("nn/layers/max_pool2d.dml") as max_pool2d
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
source("nn/layers/tanh.dml") as tanh
-source("nn/test/conv_simple.dml") as conv_simple
-source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/conv2d_simple.dml") as conv2d_simple
+source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
source("nn/test/util.dml") as test_util
source("nn/util.dml") as util
batch_norm = function() {
/*
- * Test for the `batch_norm` function.
+ * Test for the batch normalization function.
*/
- print("Testing the batch_norm function.")
+ print("Testing the batch normalization function.")
# Generate data
N = 4 # Number of examples
@@ -68,11 +68,11 @@ batch_norm = function() {
}
}
-conv = function() {
+conv2d = function() {
/*
- * Test for the `conv` functions.
+ * Test for the 2D convolution functions.
*/
- print("Testing the conv functions.")
+ print("Testing the 2D convolution functions.")
# Generate data
N = 2 # num examples
@@ -87,14 +87,14 @@ conv = function() {
X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
# Create layer
- [W, b] = conv::init(F, C, Hf, Wf)
+ [W, b] = conv2d::init(F, C, Hf, Wf)
# Forward
- [out, Hout, Wout] = conv::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = conv_simple::forward(X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = conv_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# Equivalency check
out = matrix(out, rows=1, cols=N*F*Hout*Wout)
@@ -110,7 +110,7 @@ conv = function() {
cross_entropy_loss = function() {
/*
- * Test for the `cross-entropy` loss function.
+ * Test for the cross-entropy loss function.
*
* Here we make sure that the cross-entropy loss function does
* not propagate `infinity` values in the case that a prediction is
@@ -206,11 +206,11 @@ padding = function() {
}
}
-max_pool = function() {
+max_pool2d = function() {
/*
- * Test for the `max_pool` functions.
+ * Test for the 2D max pooling functions.
*/
- print("Testing the max pool functions.")
+ print("Testing the 2D max pooling functions.")
# Generate data
N = 2 # num examples
@@ -227,12 +227,14 @@ max_pool = function() {
print(" - Testing w/ padh="+padh+" & padw="+padw+".")
#if (1==1) {} # force correct printing
#print(" - Testing forward")
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, padh, padw)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride,
- padh, padw)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride,
+ padh, padw)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win,
+ Hf, Wf,
+ stride, stride,
+ padh, padw)
# Equivalency check
out = matrix(out, rows=1, cols=N*C*Hout*Wout)
@@ -247,11 +249,12 @@ max_pool = function() {
#print(" - Testing backward")
dout = rand(rows=N, cols=C*Hout*Wout, pdf="normal")
- dX = max_pool::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
- dX_simple = max_pool_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win, Hf, Wf,
- stride, stride, padh, padw)
- dX_builtin = max_pool_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win,
+ dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+ padh, padw)
+ dX_simple = max_pool2d_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win,
Hf, Wf, stride, stride, padh, padw)
+ dX_builtin = max_pool2d_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win,
+ Hf, Wf, stride, stride, padh, padw)
# Equivalency check
dX = matrix(dX, rows=1, cols=N*C*Hin*Win)
@@ -288,11 +291,11 @@ max_pool = function() {
pad = 0
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -326,11 +329,11 @@ max_pool = function() {
pad = 1
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -363,11 +366,11 @@ max_pool = function() {
pad = 0
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -402,11 +405,11 @@ max_pool = function() {
pad = 1
# forward
- [out, Hout, Wout] = max_pool::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- [out_simple, Hout_simple, Wout_simple] = max_pool_simple::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
- [out_builtin, Hout_builtin, Wout_builtin] = max_pool_builtin::forward(X, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
+ [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+ [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
+ [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+ stride, stride, pad, pad)
# equivalency check
# -- channel 1
@@ -417,7 +420,8 @@ max_pool = function() {
# 0 0 0
# 0 -6 0
# 0 0 0
- target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", rows=1, cols=C*Hout*Wout)
+ target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16",
+ rows=1, cols=C*Hout*Wout)
target = rbind(target, target) # n=2
tmp = test_util::check_all_equal(out, target)
tmp = test_util::check_all_equal(out_simple, target)
@@ -426,9 +430,9 @@ max_pool = function() {
spatial_batch_norm = function() {
/*
- * Test for the `spatial_batch_norm` function.
+ * Test for the spatial batch normalization function.
*/
- print("Testing the spatial_batch_norm function.")
+ print("Testing the spatial batch normalization function.")
# Generate data
N = 2 # Number of examples
@@ -532,7 +536,8 @@ tanh = function() {
# Equivalency check
for (i in 1:nrow(out)) {
for (j in 1:ncol(out)) {
- rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
+ rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]),
+ 1e-10, 1e-12)
}
}
}
[3/7] incubator-systemml git commit: [SYSTEMML-1412] Rename
`nn/test/tests.dml` to `nn/test/run_tests.dml`
Posted by du...@apache.org.
[SYSTEMML-1412] Rename `nn/test/tests.dml` to `nn/test/run_tests.dml`
Closes #447.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7744924e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7744924e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7744924e
Branch: refs/heads/master
Commit: 7744924e96701e79573f5f839cac0c7bbe97554b
Parents: 16b1cbd
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:38:56 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:38:56 2017 -0700
----------------------------------------------------------------------
.../staging/SystemML-NN/nn/test/run_tests.dml | 85 ++++++++++++++++++++
scripts/staging/SystemML-NN/nn/test/tests.dml | 85 --------------------
2 files changed, 85 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7744924e/scripts/staging/SystemML-NN/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
new file mode 100644
index 0000000..86bb77b
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
@@ -0,0 +1,85 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Script to run tests.
+ */
+source("nn/test/grad_check.dml") as grad_check
+source("nn/test/test.dml") as test
+
+print("")
+print("Starting grad checks.")
+print("---")
+
+# Loss functions
+tmp = grad_check::cross_entropy_loss()
+tmp = grad_check::l1_loss()
+tmp = grad_check::l2_loss()
+tmp = grad_check::log_loss()
+
+# Other layers
+tmp = grad_check::affine()
+tmp = grad_check::batch_norm()
+tmp = grad_check::conv_simple()
+tmp = grad_check::conv()
+tmp = grad_check::conv_builtin()
+tmp = grad_check::dropout()
+tmp = grad_check::l1_reg()
+tmp = grad_check::l2_reg()
+tmp = grad_check::lstm()
+tmp = grad_check::max_pool_simple()
+tmp = grad_check::max_pool()
+tmp = grad_check::max_pool_builtin()
+tmp = grad_check::relu()
+tmp = grad_check::rnn()
+tmp = grad_check::sigmoid()
+tmp = grad_check::softmax()
+tmp = grad_check::spatial_batch_norm()
+tmp = grad_check::tanh()
+
+# Example model
+tmp = grad_check::two_layer_affine_l2_net()
+
+print("---")
+print("Grad checks complete -- look for any ERRORs or WARNINGs.")
+print("If any tests involving ReLUs failed, try a few times " +
+ "to ensure that they were not false negatives due to " +
+ "kinks being crossed.")
+print("")
+
+print("")
+print("Starting other tests.")
+print("---")
+
+tmp = test::batch_norm()
+tmp = test::im2col()
+tmp = test::padding()
+tmp = test::conv()
+tmp = test::cross_entropy_loss()
+tmp = test::max_pool()
+tmp = test::spatial_batch_norm()
+tmp = test::tanh()
+
+print("---")
+print("Other tests complete -- look for any ERRORs or WARNINGs.")
+print("")
+print("")
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7744924e/scripts/staging/SystemML-NN/nn/test/tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/tests.dml b/scripts/staging/SystemML-NN/nn/test/tests.dml
deleted file mode 100644
index 86bb77b..0000000
--- a/scripts/staging/SystemML-NN/nn/test/tests.dml
+++ /dev/null
@@ -1,85 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Script to run tests.
- */
-source("nn/test/grad_check.dml") as grad_check
-source("nn/test/test.dml") as test
-
-print("")
-print("Starting grad checks.")
-print("---")
-
-# Loss functions
-tmp = grad_check::cross_entropy_loss()
-tmp = grad_check::l1_loss()
-tmp = grad_check::l2_loss()
-tmp = grad_check::log_loss()
-
-# Other layers
-tmp = grad_check::affine()
-tmp = grad_check::batch_norm()
-tmp = grad_check::conv_simple()
-tmp = grad_check::conv()
-tmp = grad_check::conv_builtin()
-tmp = grad_check::dropout()
-tmp = grad_check::l1_reg()
-tmp = grad_check::l2_reg()
-tmp = grad_check::lstm()
-tmp = grad_check::max_pool_simple()
-tmp = grad_check::max_pool()
-tmp = grad_check::max_pool_builtin()
-tmp = grad_check::relu()
-tmp = grad_check::rnn()
-tmp = grad_check::sigmoid()
-tmp = grad_check::softmax()
-tmp = grad_check::spatial_batch_norm()
-tmp = grad_check::tanh()
-
-# Example model
-tmp = grad_check::two_layer_affine_l2_net()
-
-print("---")
-print("Grad checks complete -- look for any ERRORs or WARNINGs.")
-print("If any tests involving ReLUs failed, try a few times " +
- "to ensure that they were not false negatives due to " +
- "kinks being crossed.")
-print("")
-
-print("")
-print("Starting other tests.")
-print("---")
-
-tmp = test::batch_norm()
-tmp = test::im2col()
-tmp = test::padding()
-tmp = test::conv()
-tmp = test::cross_entropy_loss()
-tmp = test::max_pool()
-tmp = test::spatial_batch_norm()
-tmp = test::tanh()
-
-print("---")
-print("Other tests complete -- look for any ERRORs or WARNINGs.")
-print("")
-print("")
-