You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/26 21:42:30 UTC
[04/11] incubator-systemml git commit: [SYSTEMML-1524] Graduate `nn`
library to `scripts/nn`
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
deleted file mode 100644
index f3bc9a7..0000000
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ /dev/null
@@ -1,1769 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Gradient checks for various architectures.
- */
-source("nn/layers/affine.dml") as affine
-source("nn/layers/batch_norm1d.dml") as batch_norm1d
-source("nn/layers/batch_norm2d.dml") as batch_norm2d
-source("nn/layers/conv2d.dml") as conv2d
-source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
-source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
-source("nn/layers/dropout.dml") as dropout
-source("nn/layers/l1_loss.dml") as l1_loss
-source("nn/layers/l1_reg.dml") as l1_reg
-source("nn/layers/l2_loss.dml") as l2_loss
-source("nn/layers/l2_reg.dml") as l2_reg
-source("nn/layers/log_loss.dml") as log_loss
-source("nn/layers/lstm.dml") as lstm
-source("nn/layers/max_pool2d.dml") as max_pool2d
-source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
-source("nn/layers/relu.dml") as relu
-source("nn/layers/rnn.dml") as rnn
-source("nn/layers/scale_shift1d.dml") as scale_shift1d
-source("nn/layers/scale_shift2d.dml") as scale_shift2d
-source("nn/layers/sigmoid.dml") as sigmoid
-source("nn/layers/softmax.dml") as softmax
-source("nn/layers/tanh.dml") as tanh
-source("nn/test/conv2d_simple.dml") as conv2d_simple
-source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
-source("nn/test/util.dml") as test_util
-
-affine = function() {
- /*
- * Gradient check for the affine layer.
- */
- print("Grad checking the affine layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 100 # num features
- M = 10 # num neurons
- X = rand(rows=N, cols=D)
- y = rand(rows=N, cols=M)
- [W, b] = affine::init(D, M)
-
- # Compute analytical gradients of loss wrt parameters
- out = affine::forward(X, W, b)
- dout = l2_loss::backward(out, y)
- [dX, dW, db] = affine::backward(dout, X, W, b)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = affine::forward(X, W, b)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = affine::forward(X, W, b)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- outmh = affine::forward(X, W, b)
- lossmh = l2_loss::forward(outmh, y)
- W[i,j] = old + h
- outph = affine::forward(X, W, b)
- lossph = l2_loss::forward(outph, y)
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- outmh = affine::forward(X, W, b)
- lossmh = l2_loss::forward(outmh, y)
- b[i,j] = old + h
- outph = affine::forward(X, W, b)
- lossph = l2_loss::forward(outph, y)
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-}
-
-batch_norm1d = function() {
- /*
- * Gradient check for the 1D batch normalization layer.
- */
- print("Grad checking the 1D batch normalization layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 100 # num features
- mu = 0.9 # momentum
- eps = 1e-5 # epsilon
- X = rand(rows=N, cols=D)
- y = rand(rows=N, cols=D)
- gamma = rand(rows=1, cols=D)
- beta = rand(rows=1, cols=D)
- ema_mean = rand(rows=1, cols=D)
- ema_var = rand(rows=1, cols=D)
- #[dummy, dummy, ema_mean, ema_var] = batch_norm1d::init(D)
-
- # Check training & testing modes
- for (i in 1:2) {
- if (i == 1)
- mode = 'train'
- else
- mode = 'test'
- print(" - Grad checking the '"+mode+"' mode.")
-
- # Compute analytical gradients of loss wrt parameters
- [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- dout = l2_loss::backward(out, y)
- [dX, dgamma, dbeta] = batch_norm1d::backward(dout, out, ema_mean_upd, ema_var_upd,
- cache_mean, cache_var, cache_norm,
- X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking gamma.")
- for (i in 1:nrow(gamma)) {
- for (j in 1:ncol(gamma)) {
- # Compute numerical derivative
- old = as.scalar(gamma[i,j])
- gamma[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- gamma[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- gamma[i,j] = old # reset
- dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
- lossph, lossmh)
- }
- }
-
- print(" - Grad checking beta.")
- for (i in 1:nrow(beta)) {
- for (j in 1:ncol(beta)) {
- # Compute numerical derivative
- old = as.scalar(beta[i,j])
- beta[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- beta[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- beta[i,j] = old # reset
- dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
- lossph, lossmh)
- }
- }
- }
-}
-
-batch_norm2d = function() {
- /*
- * Gradient check for the 2D (spatial) batch normalization layer.
- */
- print("Grad checking the 2D (spatial) batch normalization layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- C = 2 # num channels
- Hin = 5 # input height
- Win = 5 # input width
- mu = 0.9 # momentum
- eps = 1e-5 # epsilon
- X = rand(rows=N, cols=C*Hin*Win)
- y = rand(rows=N, cols=C*Hin*Win)
- gamma = rand(rows=C, cols=1)
- beta = rand(rows=C, cols=1)
- ema_mean = rand(rows=C, cols=1)
- ema_var = rand(rows=C, cols=1)
- #[dummy, dummy, ema_mean, ema_var] = batch_norm2d::init(C)
-
- # Check training & testing modes
- for (i in 1:2) {
- if (i == 1)
- mode = 'train'
- else
- mode = 'test'
- print(" - Grad checking the '"+mode+"' mode.")
-
- # Compute analytical gradients of loss wrt parameters
- [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- dout = l2_loss::backward(out, y)
- [dX, dgamma, dbeta] = batch_norm2d::backward(dout, out, ema_mean_upd, ema_var_upd,
- cache_mean, cache_var, cache_norm,
- X, gamma, beta, C, Hin, Win, mode,
- ema_mean, ema_var, mu, eps)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking gamma.")
- for (i in 1:nrow(gamma)) {
- for (j in 1:ncol(gamma)) {
- # Compute numerical derivative
- old = as.scalar(gamma[i,j])
- gamma[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- gamma[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- gamma[i,j] = old # reset
- dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
- lossph, lossmh)
- }
- }
-
- print(" - Grad checking beta.")
- for (i in 1:nrow(beta)) {
- for (j in 1:ncol(beta)) {
- # Compute numerical derivative
- old = as.scalar(beta[i,j])
- beta[i,j] = old - h
- [outmh, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossmh = l2_loss::forward(outmh, y)
- beta[i,j] = old + h
- [outph, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
- batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
- lossph = l2_loss::forward(outph, y)
- beta[i,j] = old # reset
- dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
- lossph, lossmh)
- }
- }
- }
-}
-
-conv2d = function() {
- /*
- * Gradient check for the 2D convolutional layer using `im2col`.
- */
- print("Grad checking the `im2col` 2D convolutional layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 5 # input height
- Win = 5 # input width
- F = 2 # num filters
- Hf = 3 # filter height
- Wf = 3 # filter width
- stride = 1
- pad = 1
- X = rand(rows=N, cols=C*Hin*Win)
- y = rand(rows=N, cols=F*Hin*Win)
-
- # Create layers
- [W, b] = conv2d::init(F, C, Hf, Wf)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv2d::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- W[i,j] = old + h
- [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossph = l2_loss::forward(outph, y)
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- [outmh, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- b[i,j] = old + h
- [outph, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossph = l2_loss::forward(outph, y)
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-}
-
-conv2d_builtin = function() {
- /*
- * Gradient check for the 2D convolutional layer using built-in
- * functions.
- */
- print("Grad checking the built-in 2D convolutional layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 5 # input height
- Win = 5 # input width
- F = 2 # num filters
- Hf = 3 # filter height
- Wf = 3 # filter width
- stride = 1
- pad = 1
- X = rand(rows=N, cols=C*Hin*Win)
- y = rand(rows=N, cols=F*Hin*Win)
-
- # Create layers
- [W, b] = conv2d_builtin::init(F, C, Hf, Wf)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv2d_builtin::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- W[i,j] = old + h
- [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- b[i,j] = old + h
- [outph, Hout, Wout] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-}
-
-conv2d_simple = function() {
- /*
- * Gradient check for the simple reference 2D convolutional layer.
- */
- print("Grad checking the simple reference 2D convolutional layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 5 # input height
- Win = 5 # input width
- F = 2 # num filters
- Hf = 3 # filter height
- Wf = 3 # filter width
- stride = 1
- pad = 1
- X = rand(rows=N, cols=C*Hin*Win)
- y = rand(rows=N, cols=F*Hin*Win)
-
- # Create layers
- [W, b] = conv2d_simple::init(F, C, Hf, Wf)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- dout = l2_loss::backward(out, y)
- [dX, dW, db] = conv2d_simple::backward(dout, Hout, Wout, X, W, b, C, Hin, Win, Hf, Wf,
- stride, stride, pad, pad)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- W[i,j] = old + h
- [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- [outmh, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- b[i,j] = old + h
- [outph, Hout, Wout] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-}
-
-cross_entropy_loss = function() {
- /*
- * Gradient check for the cross-entropy loss function.
- */
- print("Grad checking the cross-entropy loss function.")
-
- # Generate data
- N = 3 # num examples
- K = 10 # num targets
- pred = rand(rows=N, cols=K, min=0, max=1, pdf="uniform")
- pred = pred / rowSums(pred) # normalized probs
- y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform")
- y = y / rowSums(y) # normalized probs
-
- # Compute analytical gradient
- dpred = cross_entropy_loss::backward(pred, y)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(pred)) {
- for (j in 1:ncol(pred)) {
- # Compute numerical derivative
- old = as.scalar(pred[i,j])
- pred[i,j] = old - h
- lossmh = cross_entropy_loss::forward(pred, y)
- pred[i,j] = old + h
- lossph = cross_entropy_loss::forward(pred, y)
- pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
- }
- }
-}
-
-dropout = function() {
- /*
- * Gradient check for the (inverted) dropout layer.
- */
- print("Grad checking the (inverted) dropout layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- M = 100 # num neurons
- p = 0.5 # probability of dropping neuron output
- seed = as.integer(floor(as.scalar(rand(rows=1, cols=1, min=1, max=100000)))) # random seed
- X = rand(rows=N, cols=M)
- y = rand(rows=N, cols=M)
-
- # Compute analytical gradients of loss wrt parameters
- [out, mask] = dropout::forward(X, p, seed)
- dout = l2_loss::backward(out, y)
- dX = dropout::backward(dout, X, p, mask)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, mask] = dropout::forward(X, p, seed)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, mask] = dropout::forward(X, p, seed)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-}
-
-l1_loss = function() {
- /*
- * Gradient check for the L1 loss function.
- */
- print("Grad checking the L1 loss function.")
-
- # Generate data
- N = 3 # num examples
- D = 2 # num targets
- pred = rand(rows=N, cols=D)
- y = rand(rows=N, cols=D)
-
- # Compute analytical gradient
- dpred = l1_loss::backward(pred, y)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(pred)) {
- for (j in 1:ncol(pred)) {
- # Compute numerical derivative
- old = as.scalar(pred[i,j])
- pred[i,j] = old - h
- lossmh = l1_loss::forward(pred, y)
- pred[i,j] = old + h
- lossph = l1_loss::forward(pred, y)
- pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
- }
- }
-}
-
-l1_reg = function() {
- /*
- * Gradient check for the L1 regularization function.
- */
- print("Grad checking the L1 regularization function.")
-
- # Generate data
- D = 5 # num features
- M = 3 # num neurons
- lambda = 0.01
- W = rand(rows=D, cols=M)
-
- # Compute analytical gradient
- dW = l1_reg::backward(W, lambda)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- reg_lossmh = l1_reg::forward(W, lambda)
- W[i,j] = old + h
- reg_lossph = l1_reg::forward(W, lambda)
- W[i,j] = old # reset W[i,j]
- dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
- reg_lossph, reg_lossmh)
- }
- }
-}
-
-l2_loss = function() {
- /*
- * Gradient check for the L2 loss function.
- */
- print("Grad checking the L2 loss function.")
-
- # Generate data
- N = 3 # num examples
- D = 2 # num targets
- pred = rand(rows=N, cols=D)
- y = rand(rows=N, cols=D)
-
- # Compute analytical gradient
- dpred = l2_loss::backward(pred, y)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(pred)) {
- for (j in 1:ncol(pred)) {
- # Compute numerical derivative
- old = as.scalar(pred[i,j])
- pred[i,j] = old - h
- lossmh = l2_loss::forward(pred, y)
- pred[i,j] = old + h
- lossph = l2_loss::forward(pred, y)
- pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
- }
- }
-}
-
-l2_reg = function() {
- /*
- * Gradient check for the L2 regularization function.
- */
- print("Grad checking the L2 regularization function.")
-
- # Generate data
- D = 5 # num features
- M = 3 # num neurons
- lambda = 0.01
- W = rand(rows=D, cols=M)
-
- # Compute analytical gradient
- dW = l2_reg::backward(W, lambda)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- reg_lossmh = l2_reg::forward(W, lambda)
- W[i,j] = old + h
- reg_lossph = l2_reg::forward(W, lambda)
- W[i,j] = old # reset W[i,j]
- dW_num = (reg_lossph-reg_lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
- reg_lossph, reg_lossmh)
- }
- }
-}
-
-log_loss = function() {
- /*
- * Gradient check for the log loss function.
- */
- print("Grad checking the log loss function.")
-
- # Generate data
- N = 20 # num examples
- D = 1 # num targets
- pred = rand(rows=N, cols=D, min=0, max=1, pdf="uniform")
- y = round(rand(rows=N, cols=D, min=0, max=1, pdf="uniform"))
-
- # Compute analytical gradient
- dpred = log_loss::backward(pred, y)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(pred)) {
- for (j in 1:ncol(pred)) {
- # Compute numerical derivative
- old = as.scalar(pred[i,j])
- pred[i,j] = old - h
- lossmh = log_loss::forward(pred, y)
- pred[i,j] = old + h
- lossph = log_loss::forward(pred, y)
- pred[i,j] = old # reset W[i,j]
- dpred_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
- }
- }
-}
-
-lstm = function() {
- /*
- * Gradient check for the LSTM layer.
- */
- print("Grad checking the LSTM layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 10 # num features
- T = 15 # num timesteps (sequence length)
- M = 5 # num neurons
- return_seq = TRUE
- X = rand(rows=N, cols=T*D)
- y = rand(rows=N, cols=T*M)
- yc = rand(rows=N, cols=M)
- out0 = rand(rows=N, cols=M)
- c0 = rand(rows=N, cols=M)
- [W, b, dummy, dummy2] = lstm::init(N, D, M)
-
- # Compute analytical gradients of loss wrt parameters
- [out, c, cache_out, cache_c, cache_ifog] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- dout = l2_loss::backward(out, y)
- dc = l2_loss::backward(c, yc)
- [dX, dW, db, dout0, dc0] = lstm::backward(dout, dc, X, W, b, T, D, return_seq, out0, c0,
- cache_out, cache_c, cache_ifog)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outmh = l2_loss::forward(outmh, y)
- loss_cmh = l2_loss::forward(cmh, yc)
- lossmh = loss_outmh + loss_cmh
- X[i,j] = old + h
- [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outph = l2_loss::forward(outph, y)
- loss_cph = l2_loss::forward(cph, yc)
- lossph = loss_outph + loss_cph
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outmh = l2_loss::forward(outmh, y)
- loss_cmh = l2_loss::forward(cmh, yc)
- lossmh = loss_outmh + loss_cmh
- W[i,j] = old + h
- [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outph = l2_loss::forward(outph, y)
- loss_cph = l2_loss::forward(cph, yc)
- lossph = loss_outph + loss_cph
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outmh = l2_loss::forward(outmh, y)
- loss_cmh = l2_loss::forward(cmh, yc)
- lossmh = loss_outmh + loss_cmh
- b[i,j] = old + h
- [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outph = l2_loss::forward(outph, y)
- loss_cph = l2_loss::forward(cph, yc)
- lossph = loss_outph + loss_cph
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking out0.")
- for (i in 1:nrow(out0)) {
- for (j in 1:ncol(out0)) {
- # Compute numerical derivative
- old = as.scalar(out0[i,j])
- out0[i,j] = old - h
- [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outmh = l2_loss::forward(outmh, y)
- loss_cmh = l2_loss::forward(cmh, yc)
- lossmh = loss_outmh + loss_cmh
- out0[i,j] = old + h
- [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outph = l2_loss::forward(outph, y)
- loss_cph = l2_loss::forward(cph, yc)
- lossph = loss_outph + loss_cph
- out0[i,j] = old # reset
- dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking c0.")
- for (i in 1:nrow(c0)) {
- for (j in 1:ncol(c0)) {
- # Compute numerical derivative
- old = as.scalar(c0[i,j])
- c0[i,j] = old - h
- [outmh, cmh, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outmh = l2_loss::forward(outmh, y)
- loss_cmh = l2_loss::forward(cmh, yc)
- lossmh = loss_outmh + loss_cmh
- c0[i,j] = old + h
- [outph, cph, cache, cache, cache] = lstm::forward(X, W, b, T, D, return_seq, out0, c0)
- loss_outph = l2_loss::forward(outph, y)
- loss_cph = l2_loss::forward(cph, yc)
- lossph = loss_outph + loss_cph
- c0[i,j] = old # reset
- dc0_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
- }
- }
-}
-
-max_pool2d = function() {
- /*
- * Gradient check for the 2D max pooling layer.
- */
- print("Grad checking the 2D max pooling layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 4 # input height
- Win = 4 # input width
- Hf = 2 # pool filter height
- Wf = 2 # pool filter width
- stride = 2
- X = rand(rows=N, cols=C*Hin*Win)
-
- for (pad in 0:1) {
- print(" - Grad checking w/ pad="+pad+".")
- Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1))
- Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1))
- y = rand(rows=N, cols=C*Hout*Wout)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- dout = l2_loss::backward(out, y)
- dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
- }
-}
-
-max_pool2d_builtin = function() {
- /*
- * Gradient check for the 2D max pooling layer.
- */
- print("Grad checking the built-in 2D max pooling layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 4 # input height
- Win = 4 # input width
- Hf = 2 # pool filter height
- Wf = 2 # pool filter width
- stride = 2
- X = rand(rows=N, cols=C*Hin*Win)
-
- for (pad in 0:1) {
- print(" - Grad checking w/ pad="+pad+".")
- Hout = as.integer(floor((Hin + 2 * pad - Hf) / stride + 1))
- Wout = as.integer(floor((Win + 2 * pad - Wf) / stride + 1))
- y = rand(rows=N, cols=C*Hout*Wout)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- dout = l2_loss::backward(out, y)
- dX = max_pool2d_builtin::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
- }
-}
-
-max_pool2d_simple = function() {
- /*
- * Gradient check for the simple reference 2D max pooling layer.
- */
- print("Grad checking the simple reference 2D max pooling layer with L2 loss.")
-
- # Generate data
- N = 2 # num examples
- C = 2 # num channels
- Hin = 4 # input height
- Win = 4 # input width
- Hf = 2 # pool filter height
- Wf = 2 # pool filter width
- stride = 2
- X = rand(rows=N, cols=C*Hin*Win)
-
- for (pad in 0:1) {
- print(" - Grad checking w/ pad="+pad+".")
- Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1))
- Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1))
- y = rand(rows=N, cols=C*Hout*Wout)
-
- # Compute analytical gradients of loss wrt parameters
- [out, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
- dout = l2_loss::backward(out, y)
- dX = max_pool2d_simple::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, Hout, Wout] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf, stride, stride,
- pad, pad)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
- }
-}
-
-relu = function() {
- /*
- * Gradient check for the ReLU nonlinearity layer.
- *
- * NOTE: This could result in a false-negative in which the test
- * fails due to a kink being crossed in the nonlinearity. This
- * occurs when the tests, f(x-h) and f(x+h), end up on opposite
- * sides of the zero threshold of max(0, fx). For now, just run
- * the tests again. In the future, we can explicitly check for
- * this and rerun the test automatically.
- */
- print("Grad checking the ReLU nonlinearity layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- M = 10 # num neurons
- X = rand(rows=N, cols=M, min=-5, max=5)
- y = rand(rows=N, cols=M)
-
- # Compute analytical gradients of loss wrt parameters
- out = relu::forward(X)
- dout = l2_loss::backward(out, y)
- dX = relu::backward(dout, X)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = relu::forward(X)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = relu::forward(X)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-}
-
-rnn = function() {
- /*
- * Gradient check for the simple RNN layer.
- */
- print("Grad checking the simple RNN layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 10 # num features
- T = 15 # num timesteps (sequence length)
- M = 5 # num neurons
- return_seq = TRUE
- X = rand(rows=N, cols=T*D)
- y = rand(rows=N, cols=T*M)
- out0 = rand(rows=N, cols=M)
- [W, b, dummy] = rnn::init(N, D, M)
-
- # Compute analytical gradients of loss wrt parameters
- [out, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- dout = l2_loss::backward(out, y)
- [dX, dW, db, dout0] = rnn::backward(dout, X, W, b, T, D, return_seq, out0, cache_out)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W.")
- for (i in 1:nrow(W)) {
- for (j in 1:ncol(W)) {
- # Compute numerical derivative
- old = as.scalar(W[i,j])
- W[i,j] = old - h
- [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossmh = l2_loss::forward(outmh, y)
- W[i,j] = old + h
- [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossph = l2_loss::forward(outph, y)
- W[i,j] = old # reset
- dW_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b.")
- for (i in 1:nrow(b)) {
- for (j in 1:ncol(b)) {
- # Compute numerical derivative
- old = as.scalar(b[i,j])
- b[i,j] = old - h
- [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossmh = l2_loss::forward(outmh, y)
- b[i,j] = old + h
- [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossph = l2_loss::forward(outph, y)
- b[i,j] = old # reset
- db_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking out0.")
- for (i in 1:nrow(out0)) {
- for (j in 1:ncol(out0)) {
- # Compute numerical derivative
- old = as.scalar(out0[i,j])
- out0[i,j] = old - h
- [outmh, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossmh = l2_loss::forward(outmh, y)
- out0[i,j] = old + h
- [outph, cache_out] = rnn::forward(X, W, b, T, D, return_seq, out0)
- lossph = l2_loss::forward(outph, y)
- out0[i,j] = old # reset
- dout0_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
- }
- }
-}
-
-scale_shift1d = function() {
- /*
- * Gradient check for the 1D scale & shift layer.
- */
- print("Grad checking the 1D scale & shift layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 100 # num features
- X = rand(rows=N, cols=D)
- y = rand(rows=N, cols=D)
- [gamma, beta] = scale_shift1d::init(D)
-
- # Compute analytical gradients of loss wrt parameters
- out = scale_shift1d::forward(X, gamma, beta)
- dout = l2_loss::backward(out, y)
- [dX, dgamma, dbeta] = scale_shift1d::backward(dout, out, X, gamma, beta)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = scale_shift1d::forward(X, gamma, beta)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = scale_shift1d::forward(X, gamma, beta)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking gamma.")
- for (i in 1:nrow(gamma)) {
- for (j in 1:ncol(gamma)) {
- # Compute numerical derivative
- old = as.scalar(gamma[i,j])
- gamma[i,j] = old - h
- outmh = scale_shift1d::forward(X, gamma, beta)
- lossmh = l2_loss::forward(outmh, y)
- gamma[i,j] = old + h
- outph = scale_shift1d::forward(X, gamma, beta)
- lossph = l2_loss::forward(outph, y)
- gamma[i,j] = old # reset
- dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
- lossph, lossmh)
- }
- }
-
- print(" - Grad checking beta.")
- for (i in 1:nrow(beta)) {
- for (j in 1:ncol(beta)) {
- # Compute numerical derivative
- old = as.scalar(beta[i,j])
- beta[i,j] = old - h
- outmh = scale_shift1d::forward(X, gamma, beta)
- lossmh = l2_loss::forward(outmh, y)
- beta[i,j] = old + h
- outph = scale_shift1d::forward(X, gamma, beta)
- lossph = l2_loss::forward(outph, y)
- beta[i,j] = old # reset
- dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
- lossph, lossmh)
- }
- }
-}
-
-scale_shift2d = function() {
- /*
- * Gradient check for the 2D scale & shift layer.
- */
- print("Grad checking the 2D scale & shift layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- C = 2 # num channels
- Hin = 5 # input height
- Win = 5 # input width
- X = rand(rows=N, cols=C*Hin*Win)
- y = rand(rows=N, cols=C*Hin*Win)
- [gamma, beta] = scale_shift2d::init(C)
-
- # Compute analytical gradients of loss wrt parameters
- out = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- dout = l2_loss::backward(out, y)
- [dX, dgamma, dbeta] = scale_shift2d::backward(dout, out, X, gamma, beta, C, Hin, Win)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking gamma.")
- for (i in 1:nrow(gamma)) {
- for (j in 1:ncol(gamma)) {
- # Compute numerical derivative
- old = as.scalar(gamma[i,j])
- gamma[i,j] = old - h
- outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossmh = l2_loss::forward(outmh, y)
- gamma[i,j] = old + h
- outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossph = l2_loss::forward(outph, y)
- gamma[i,j] = old # reset
- dgamma_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
- lossph, lossmh)
- }
- }
-
- print(" - Grad checking beta.")
- for (i in 1:nrow(beta)) {
- for (j in 1:ncol(beta)) {
- # Compute numerical derivative
- old = as.scalar(beta[i,j])
- beta[i,j] = old - h
- outmh = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossmh = l2_loss::forward(outmh, y)
- beta[i,j] = old + h
- outph = scale_shift2d::forward(X, gamma, beta, C, Hin, Win)
- lossph = l2_loss::forward(outph, y)
- beta[i,j] = old # reset
- dbeta_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
- lossph, lossmh)
- }
- }
-}
-
-sigmoid = function() {
- /*
- * Gradient check for the sigmoid nonlinearity layer.
- */
- print("Grad checking the sigmoid nonlinearity layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- M = 10 # num neurons
- X = rand(rows=N, cols=M)
- y = rand(rows=N, cols=M)
-
- # Compute analytical gradients of loss wrt parameters
- out = sigmoid::forward(X)
- dout = l2_loss::backward(out, y)
- dX = sigmoid::backward(dout, X)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = sigmoid::forward(X)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = sigmoid::forward(X)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-}
-
-softmax = function() {
- /*
- * Gradient check for the softmax layer.
- */
- print("Grad checking the softmax layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- D = 10 # num classes
- X = rand(rows=N, cols=D)
- y = rand(rows=N, cols=D, min=0, max=1, pdf="uniform")
- y = y / rowSums(y)
-
- # Compute analytical gradients of loss wrt parameters
- out = softmax::forward(X)
- dout = l2_loss::backward(out, y)
- dX = softmax::backward(dout, X)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = softmax::forward(X)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = softmax::forward(X)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-}
-
-tanh = function() {
- /*
- * Gradient check for the hyperbolic tangent (tanh) nonlinearity
- * layer.
- */
- print("Grad checking the tanh nonlinearity layer with L2 loss.")
-
- # Generate data
- N = 3 # num examples
- M = 10 # num neurons
- X = rand(rows=N, cols=M)
- y = rand(rows=N, cols=M)
-
- # Compute analytical gradients of loss wrt parameters
- out = tanh::forward(X)
- dout = l2_loss::backward(out, y)
- dX = tanh::backward(dout, X)
-
- # Grad check
- h = 1e-5
- for (i in 1:nrow(X)) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old = as.scalar(X[i,j])
- X[i,j] = old - h
- outmh = tanh::forward(X)
- lossmh = l2_loss::forward(outmh, y)
- X[i,j] = old + h
- outph = tanh::forward(X)
- lossph = l2_loss::forward(outph, y)
- X[i,j] = old # reset
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-}
-
-two_layer_affine_l2_net = function() {
- /*
- * Gradient check for a two-layer, fully-connected, feed-forward
- * network with ReLU nonlinearity and L2 loss.
- *
- * NOTE: This could result in a false-negative in which the test
- * fails due to a kink being crossed in the ReLU nonlinearity. This
- * occurs when the tests, f(x-h) and f(x+h), end up on opposite
- * sides of the zero threshold of max(0, fx). For now, just run
- * the tests again. In the future, we can explicitly check for
- * this and rerun the test automatically.
- */
- print("Grad checking a two-layer, fully-connected, feed-forward network with a ReLU " +
- "nonlinearity, and an L2 loss function.")
-
- # Generate input data
- N = 1000 # num examples
- D = 100 # num features
- yD = 5 # num targets
- X = rand(rows=N, cols=D, pdf="normal")
- y = rand(rows=N, cols=yD)
-
- # Create 2-layer, fully-connected network
- M = 10 # number of hidden neurons
- [W1, b1] = affine::init(D, M)
- [W2, b2] = affine::init(M, yD)
-
- # Optimize for short "burn-in" time to move to characteristic
- # mode of operation and unmask any real issues.
- print(" - Burn-in:")
- lr = 0.0001
- decay = 0.99
- for(i in 1:5) {
- # Compute forward and backward passes of net
- [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2)
- print(" - L2 loss: " + loss)
-
- # Optimize with basic SGD
- W1 = W1 - lr * dW1
- b1 = b1 - lr * db1
- W2 = W2 - lr * dW2
- b2 = b2 - lr * db2
- lr = lr * decay
- }
-
- # Compute analytical gradients
- [pred, loss, dX, dW1, db1, dW2, db2] = two_layer_affine_l2_net_run(X, y, W1, b1, W2, b2)
-
- # Grad check
- h = 1e-5
- print(" - Grad checking X.")
- for (i in 1:2) {
- for (j in 1:ncol(X)) {
- # Compute numerical derivative
- old_x = as.scalar(X[i,j])
- X[i,j] = old_x - h
- [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- X[i,j] = old_x + h
- [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- X[i,j] = old_x # reset X[i,j]
- dX_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W1.")
- for (i in 1:nrow(W1)) {
- for (j in 1:ncol(W1)) {
- # Compute numerical derivative
- old_w = as.scalar(W1[i,j])
- W1[i,j] = old_w - h
- [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- W1[i,j] = old_w + h
- [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- W1[i,j] = old_w # reset W[i,j]
- dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking W2.")
- for (i in 1:nrow(W2)) {
- for (j in 1:ncol(W2)) {
- # Compute numerical derivative
- old_w = as.scalar(W2[i,j])
- W2[i,j] = old_w - h
- [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- W2[i,j] = old_w + h
- [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- W2[i,j] = old_w # reset W[i,j]
- dWij_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b1.")
- for (i in 1:nrow(b1)) {
- for (j in 1:ncol(b1)) {
- # Compute numerical derivative
- old_b = as.scalar(b1[i,j])
- b1[i,j] = old_b - h
- [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- b1[i,j] = old_b + h
- [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- b1[i,j] = old_b # reset b[1,j]
- dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
- }
- }
-
- print(" - Grad checking b2.")
- for (i in 1:nrow(b2)) {
- for (j in 1:ncol(b2)) {
- # Compute numerical derivative
- old_b = as.scalar(b2[i,j])
- b2[i,j] = old_b - h
- [lossmh, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- b2[i,j] = old_b + h
- [lossph, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
- b2[i,j] = old_b # reset b[1,j]
- dbij_num = (lossph-lossmh) / (2*h) # numerical derivative
-
- # Check error
- rel_error = test_util::check_rel_grad_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
- }
- }
-}
-
-/*
- * Test network with forward/backward functions.
- */
-two_layer_affine_l2_net_run = function(matrix[double] X, matrix[double] y,
- matrix[double] W1, matrix[double] b1,
- matrix[double] W2, matrix[double] b2)
- return (matrix[double] pred, double loss,
- matrix[double] dX,
- matrix[double] dW1, matrix[double] db1,
- matrix[double] dW2, matrix[double] db2) {
- # Compute forward pass
- [loss, pred, aout, hout] = two_layer_affine_l2_net_forward(X, y, W1, b1, W2, b2)
-
- # Compute backward pass
- [dX, dpred, daout, dhout, dW1, db1, dW2, db2] =
- two_layer_affine_l2_net_backward(X, y, pred, aout, hout, W1, b1, W2, b2)
-}
-
-two_layer_affine_l2_net_forward = function(matrix[double] X, matrix[double] y,
- matrix[double] W1, matrix[double] b1,
- matrix[double] W2, matrix[double] b2)
- return (double loss, matrix[double] pred, matrix[double] aout, matrix[double] hout) {
- # Compute forward pass
- hout = affine::forward(X, W1, b1)
- aout = relu::forward(hout)
- pred = affine::forward(aout, W2, b2)
-
- # Compute loss
- loss = l2_loss::forward(pred, y)
-}
-
-two_layer_affine_l2_net_backward = function(matrix[double] X, matrix[double] y, matrix[double] pred,
- matrix[double] aout, matrix[double] hout,
- matrix[double] W1, matrix[double] b1,
- matrix[double] W2, matrix[double] b2)
- return (matrix[double] dX, matrix[double] dpred,
- matrix[double] daout, matrix[double] dhout,
- matrix[double] dW1, matrix[double] db1, matrix[double] dW2, matrix[double] db2) {
- # Compute backward pass
- dpred = l2_loss::backward(pred, y)
- [daout, dW2, db2] = affine::backward(dpred, aout, W2, b2)
- dhout = relu::backward(daout, hout)
- [dX, dW1, db1] = affine::backward(dhout, X, W1, b1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
deleted file mode 100644
index 188bd6e..0000000
--- a/scripts/staging/SystemML-NN/nn/test/max_pool2d_simple.dml
+++ /dev/null
@@ -1,172 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Max Pooling layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- Hout = as.integer(floor((Hin + 2*padh - Hf)/strideh + 1))
- Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1))
-
- # Create output volume
- out = matrix(0, rows=N, cols=C*Hout*Wout)
-
- # Max pooling
- parfor (n in 1:N, check=0) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded # shape (C, (Hin+2*padh)*(Win+2*padw))
-
- parfor (c in 1:C, check=0) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- parfor (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- win = (wout-1) * stridew + 1
- out[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout] = max(img_slice[hin:hin+Hf-1,
- win:win+Wf-1])
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout, matrix[double] X,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a 2D spatial max pooling layer.
- * The input data has N examples, each represented as a 3D volume
- * unrolled into a single vector.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, C*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * A typical value is 0.
- * - padw: Padding for left and right sides.
- * A typical value is 0.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- */
- N = nrow(X)
-
- # Create gradient volume
- dX = matrix(0, rows=N, cols=C*Hin*Win)
-
- # Gradient of max pooling
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
-
- # Pad image
- pad_value = -1/0
- Xn_padded = matrix(pad_value, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- img = Xn_padded
-
- dimg = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (c in 1:C) { # all channels
- img_slice = matrix(img[c,], rows=Hin+2*padh, cols=Win+2*padw)
- dimg_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- for (hout in 1:Hout, check=0) { # all output rows
- hin = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- win = (wout-1) * stridew + 1
- img_slice_patch = img_slice[hin:hin+Hf-1, win:win+Wf-1]
- max_val_ind = img_slice_patch == max(img_slice_patch) # max value indicator matrix
- # gradient passes through only for the max value(s) in this patch
- dimg_slice_patch = max_val_ind * dout[n, (c-1)*Hout*Wout + (hout-1)*Wout + wout]
- dimg_slice[hin:hin+Hf-1, win:win+Wf-1] = dimg_slice[hin:hin+Hf-1, win:win+Wf-1]
- + dimg_slice_patch
- }
- }
- dimg[c,] = matrix(dimg_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
-
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dimg[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/run_tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/run_tests.dml b/scripts/staging/SystemML-NN/nn/test/run_tests.dml
deleted file mode 100644
index d8173a9..0000000
--- a/scripts/staging/SystemML-NN/nn/test/run_tests.dml
+++ /dev/null
@@ -1,90 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Script to run tests.
- */
-source("nn/test/grad_check.dml") as grad_check
-source("nn/test/test.dml") as test
-
-print("")
-print("Starting grad checks.")
-print("---")
-
-# Loss & loss-related functions
-grad_check::cross_entropy_loss()
-grad_check::l1_loss()
-grad_check::l1_reg()
-grad_check::l2_loss()
-grad_check::l2_reg()
-grad_check::log_loss()
-print("")
-
-# Core layers
-grad_check::affine()
-grad_check::batch_norm1d()
-grad_check::batch_norm2d()
-grad_check::conv2d()
-grad_check::conv2d_builtin()
-grad_check::conv2d_simple()
-grad_check::dropout()
-grad_check::lstm()
-grad_check::max_pool2d()
-grad_check::max_pool2d_builtin()
-grad_check::max_pool2d_simple()
-grad_check::relu()
-grad_check::rnn()
-grad_check::scale_shift1d()
-grad_check::scale_shift2d()
-grad_check::sigmoid()
-grad_check::softmax()
-grad_check::tanh()
-print("")
-
-# Example model
-grad_check::two_layer_affine_l2_net()
-print("")
-
-print("---")
-print("Grad checks complete -- look for any ERRORs or WARNINGs.")
-print("If any tests involving ReLUs failed, try a few times " +
- "to ensure that they were not false negatives due to " +
- "kinks being crossed.")
-print("")
-
-print("")
-print("Starting other tests.")
-print("---")
-
-test::batch_norm1d()
-test::batch_norm2d()
-test::conv2d()
-test::cross_entropy_loss()
-test::im2col()
-test::max_pool2d()
-test::padding()
-test::tanh()
-
-print("---")
-print("Other tests complete -- look for any ERRORs or WARNINGs.")
-print("")
-print("")
-