You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/26 21:42:31 UTC
[05/11] incubator-systemml git commit: [SYSTEMML-1524] Graduate `nn`
library to `scripts/nn`
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/rnn.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/rnn.dml b/scripts/staging/SystemML-NN/nn/layers/rnn.dml
deleted file mode 100644
index 3c6faae..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/rnn.dml
+++ /dev/null
@@ -1,183 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Simple (Vanilla) RNN layer.
- */
-source("nn/layers/tanh.dml") as tanh
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b, int T, int D,
- boolean return_sequences, matrix[double] out0)
- return (matrix[double] out, matrix[double] cache_out) {
- /*
- * Computes the forward pass for a simple RNN layer with M neurons.
- * The input data has N sequences of T examples, each with D features.
- *
- * In a simple RNN, the output of the previous timestep is fed back
- * in as an additional input at the current timestep.
- *
- * Inputs:
- * - X: Inputs, of shape (N, T*D).
- * - W: Weights, of shape (D+M, M).
- * - b: Biases, of shape (1, M).
- * - T: Length of example sequences (number of timesteps).
- * - D: Dimensionality of the input features (number of features).
- * - return_sequences: Whether to return `out` at all timesteps,
- * or just for the final timestep.
- * - out0: Output matrix from previous timestep, of shape (N, M).
- * Note: This is *optional* and could just be an empty matrix.
- *
- * Outputs:
- * - out: If `return_sequences` is True, outputs for all timesteps,
- * of shape (N, T*M). Else, outputs for the final timestep, of
- * shape (N, M).
- * - cache_out: Cache of outputs, of shape (T, N*M).
- * Note: This is used for performance during training.
- */
- N = nrow(X)
- M = ncol(W)
- out_prev = out0
- if (return_sequences) {
- out = matrix(0, rows=N, cols=T*M)
- }
- else {
- out = matrix(0, rows=N, cols=M)
- }
- # caches to be used during the backward pass for performance
- cache_out = matrix(0, rows=T, cols=N*M)
-
- for (t in 1:T) { # each timestep
- X_t = X[,(t-1)*D+1:t*D] # shape (N, D)
- input = cbind(X_t, out_prev) # shape (N, D+M)
- out_t = tanh::forward(input %*% W + b) # shape (N, M)
- # store
- if (return_sequences) {
- out[,(t-1)*M+1:t*M] = out_t
- }
- else {
- out = out_t
- }
- out_prev = out_t
- cache_out[t,] = matrix(out_t, rows=1, cols=N*M) # reshape
- }
-}
-
-backward = function(matrix[double] dout, matrix[double] X, matrix[double] W, matrix[double] b,
- int T, int D, boolean given_sequences, matrix[double] out0,
- matrix[double] cache_out)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db, matrix[double] dout0) {
- /*
- * Computes the backward pass for a simple RNN layer with M neurons.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream. If `given_sequences`
- * is True, contains gradients on outputs for all timesteps,
- * of shape (N, T*M). Else, contains gradient on output for
- * the final timestep, of shape (N, M).
- * - X: Inputs, of shape (N, T*D).
- * - W: Weights, of shape (D+M, M).
- * - b: Biases, of shape (1, M).
- * - T: Length of example sequences (number of timesteps).
- * - D: Dimensionality of the input features (number of features).
- * - given_sequences: Whether `dout` is for all timesteps,
- * or just for the final timestep. This is based on whether
- * `return_sequences` was true in the forward pass.
- * - out0: Output matrix from previous timestep, of shape (N, M).
- * Note: This is *optional* and could just be an empty matrix.
- * - cache_out: Cache of outputs, of shape (T, N*M).
- * Note: This is used for performance during training.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, T*D).
- * - dW: Gradient wrt `W`, of shape (D+M, 4M).
- * - db: Gradient wrt `b`, of shape (1, 4M).
- * - dout0: Gradient wrt `out0`, of shape (N, M).
- */
- N = nrow(X)
- M = ncol(W)
- dX = matrix(0, rows=N, cols=T*D)
- dW = matrix(0, rows=D+M, cols=M)
- db = matrix(0, rows=1, cols=M)
- dout0 = matrix(0, rows=N, cols=M)
- if (!given_sequences) {
- # only given dout for output at final timestep, so prepend empty douts for all other timesteps
- dout = cbind(matrix(0, rows=N, cols=(T-1)*D), dout) # shape (N, T*M)
- }
-
- t = T
- for (iter in 1:T) { # each timestep in reverse order
- X_t = X[,(t-1)*D+1:t*D] # shape (N, D)
- dout_t = dout[,(t-1)*M+1:t*M] # shape (N, M)
- out_t = matrix(cache_out[t,], rows=N, cols=M) # shape (N, M)
- if (t == 1) {
- out_prev = out0 # shape (N, M)
- }
- else {
- out_prev = matrix(cache_out[t-1,], rows=N, cols=M) # shape (N, M)
- }
- input = cbind(X_t, out_prev) # shape (N, D+M)
- dout_t_raw = (1-out_t^2) * dout_t # into tanh, shape (N, M)
- dW = dW + t(input) %*% dout_t_raw # shape (D+M, M)
- db = db + colSums(dout_t_raw) # shape (1, M)
- dinput = dout_t_raw %*% t(W) # shape (N, D+M)
- dX[,(t-1)*D+1:t*D] = dinput[,1:D]
- dout_prev = dinput[,D+1:D+M] # shape (N, M)
- if (t == 1) {
- dout0 = dout_prev # shape (N, M)
- }
- else {
- dout[,(t-2)*M+1:(t-1)*M] = dout[,(t-2)*M+1:(t-1)*M] + dout_prev # shape (N, M)
- }
- t = t - 1
- }
-}
-
-init = function(int N, int D, int M)
- return (matrix[double] W, matrix[double] b, matrix[double] out0) {
- /*
- * Initialize the parameters of this layer.
- *
- * Note: This is just a convenience function, and parameters
- * may be initialized manually if needed.
- *
- * We use the Glorot uniform heuristic which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * uniform weights by a factor of sqrt(6/(fan_in + fan_out)).
- * - http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
- *
- * Inputs:
- * - N: Number of examples in batch.
- * - D: Dimensionality of the input features (number of features).
- * - M: Number of neurons in this layer.
- *
- * Outputs:
- * - W: Weights, of shape (D+M, M).
- * - b: Biases, of shape (1, M).
- * - out0: Empty previous timestep output matrix, of shape (N, M).
- */
- fan_in = D+M
- fan_out = M
- scale = sqrt(6/(fan_in+fan_out))
- W = rand(rows=D+M, cols=M, min=-scale, max=scale, pdf="uniform")
- b = matrix(0, rows=1, cols=M)
- out0 = matrix(0, rows=N, cols=M)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/scale_shift1d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/scale_shift1d.dml b/scripts/staging/SystemML-NN/nn/layers/scale_shift1d.dml
deleted file mode 100644
index 7e162a3..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/scale_shift1d.dml
+++ /dev/null
@@ -1,95 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 1D Scale & Shift layer.
- */
-
-forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta)
- return (matrix[double] out) {
- /*
- * Computes the forward pass for a 1D scale & shift layer. The input
- * data has N examples, each with D features.
- *
- * A 1D scale & shift layer introduces learnable parameters
- * (gamma, beta) to scale and shift the input on a per-feature basis.
- *
- * `y = x*gamma + beta`
- *
- * Inputs:
- * - X: Inputs, of shape (N, D).
- * - gamma: Scale parameters, of shape (1, D).
- * - beta: Shift parameters, of shape (1, D).
- *
- * Outputs:
- * - out: Outputs, of shape (N, D).
- */
- # Scale and shift
- out = X*gamma + beta # shape (N, D)
-}
-
-backward = function(matrix[double] dout, matrix[double] out,
- matrix[double] X, matrix[double] gamma, matrix[double] beta)
- return (matrix[double] dX, matrix[double] dgamma, matrix[double] dbeta) {
- /*
- * Computes the backward pass for a 1D scale & shift layer.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of shape (N, D).
- * - out: Outputs from the forward pass, of shape (N, D).
- * - X: Inputs, of shape (N, D).
- * - gamma: Scale parameters, of shape (1, D).
- * - beta: Shift parameters, of shape (1, D).
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, D).
- * - dgamma: Gradient wrt `W`, of shape (1, D).
- * - dbeta: Gradient wrt `b`, of shape (1, D).
- *
- */
- # Compute gradients during training
- dgamma = colSums(dout*X) # shape (1, D)
- dbeta = colSums(dout) # shape (1, D)
- dX = dout * gamma # shape (N, D)
-}
-
-init = function(int D)
- return (matrix[double] gamma, matrix[double] beta) {
- /*
- * Initialize the parameters of this layer.
- *
- * By default, we initialize to an identity function, with a scale
- * filler of `1`, and a shift filler of `0`.
- *
- * Note: This is just a convenience function, and parameters
- * may be initialized manually if needed.
- *
- * Inputs:
- * - D: Dimensionality of the input features (number of features).
- *
- * Outputs:
- * - gamma: Scale parameters, of shape (1, D).
- * - beta: Shift parameters, of shape (1, D).
- */
- gamma = matrix(1, rows=1, cols=D)
- beta = matrix(0, rows=1, cols=D)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/scale_shift2d.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/scale_shift2d.dml b/scripts/staging/SystemML-NN/nn/layers/scale_shift2d.dml
deleted file mode 100644
index 79c884a..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/scale_shift2d.dml
+++ /dev/null
@@ -1,107 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Scale & Shift layer.
- */
-source("nn/util.dml") as util
-
-forward = function(matrix[double] X, matrix[double] gamma, matrix[double] beta,
- int C, int Hin, int Win)
- return (matrix[double] out) {
- /*
- * Computes the forward pass for a 2D scale & shift layer. The input
- * data has N examples, each represented as a 3D volume unrolled into
- * a single vector.
- *
- * A 2D scale & shift layer introduces learnable parameters
- * (gamma, beta) to scale and shift the input on a per-channel basis.
- *
- * `y = x*gamma + beta`
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - gamma: Scale parameters, of shape (C, 1).
- * - beta: Shift parameters, of shape (C, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- *
- * Outputs:
- * - out: Outputs, of shape (N, C*Hin*Win).
- */
- # Scale and shift
- scaled = bias_multiply(X, gamma) # shape (N, C*Hin*Win)
- out = bias_add(scaled, beta) # shape (N, C*Hin*Win)
-}
-
-backward = function(matrix[double] dout, matrix[double] out,
- matrix[double] X, matrix[double] gamma, matrix[double] beta,
- int C, int Hin, int Win)
- return (matrix[double] dX, matrix[double] dgamma, matrix[double] dbeta) {
- /*
- * Computes the backward pass for a 2D scale & shift layer.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of shape (N, C*Hin*Win).
- * - out: Outputs from the forward pass, of shape (N, C*Hin*Win).
- * - X: Input data matrix to the forward pass, of
- * shape (N, C*Hin*Win).
- * - gamma: Scale parameters, of shape (C, 1).
- * - beta: Shift parameters, of shape (C, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dgamma: Gradient wrt `W`, of shape (C, 1).
- * - dbeta: Gradient wrt `b`, of shape (C, 1).
- *
- */
- # Compute gradients during training
- dgamma = util::channel_sums(dout*X, C, Hin, Win) # shape (C, 1)
- dbeta = util::channel_sums(dout, C, Hin, Win) # shape (C, 1)
- dX = bias_multiply(dout, gamma) # shape (N, C*Hin*Win)
-}
-
-init = function(int C)
- return (matrix[double] gamma, matrix[double] beta) {
- /*
- * Initialize the parameters of this layer.
- *
- * By default, we initialize to an identity function, with a scale
- * filler of `1`, and a shift filler of `0`.
- *
- * Note: This is just a convenience function, and parameters
- * may be initialized manually if needed.
- *
- * Inputs:
- * - C: Number of input channels (dimensionality of input depth).
- *
- * Outputs:
- * - gamma: Scale parameters, of shape (C, 1).
- * - beta: Shift parameters, of shape (C, 1).
- */
- gamma = matrix(1, rows=C, cols=1)
- beta = matrix(0, rows=C, cols=1)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml b/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
deleted file mode 100644
index 2d85adc..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/sigmoid.dml
+++ /dev/null
@@ -1,62 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Sigmoid nonlinearity layer.
- */
-
-forward = function(matrix[double] X)
- return (matrix[double] out) {
- /*
- * Computes the forward pass for a sigmoid nonlinearity layer.
- *
- * `sigmoid(x) = 1 / (1 + e^-x)`
- *
- * If `X` contains a single feature column, the output of a sigmoid
- * layer can be interpreted as a predicted probability of a true
- * class when paired with a log loss function in a binary
- * classification problem.
- *
- * Inputs:
- * - X: Inputs, of shape (any, any).
- *
- * Outputs:
- * - out: Outputs, of same shape as `X`.
- */
- out = 1 / (1+exp(-X))
-}
-
-backward = function(matrix[double] dout, matrix[double] X)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a sigmoid nonlinearity layer.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of same shape as `X`.
- * - X: Inputs, of shape (any, any).
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of same shape as `X`.
- */
- out = 1 / (1+exp(-X))
- dX = out * (1-out) * dout
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/softmax.dml b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
deleted file mode 100644
index 68a7bc7..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/softmax.dml
+++ /dev/null
@@ -1,87 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Softmax classifier layer.
- */
-
-forward = function(matrix[double] scores)
- return (matrix[double] probs) {
- /*
- * Computes the forward pass for a softmax classifier. The inputs
- * are interpreted as unnormalized, log-probabilities for each of
- * N examples, and the softmax function transforms them to normalized
- * probabilities.
- *
- * This can be interpreted as a generalization of the sigmoid
- * function to multiple classes.
- *
- * `probs_ij = e^scores_ij / sum(e^scores_i)`
- *
- * Inputs:
- * - scores: Inputs, of shape (N, D).
- *
- * Outputs:
- * - probs: Outputs, of shape (N, D).
- */
- # For numerical stability, we subtract the max score of an example from all scores for that
- # example. This is equivalent to the original formulation:
- # e^scores_i / sum(e^scores_i) == C*e^scores_i / C*sum(e^scores_i)
- # == e^(scores_i+log(C)) / sum(e^(scores_i+log(C))
- # set log(C) = -max(scores_i):
- # == e^(scores_i-max(scores_i)) / sum(e^(scores_i-max(scores_i))
- scores = scores - rowMaxs(scores) # numerical stability
- unnorm_probs = exp(scores) # unnormalized probabilities
- probs = unnorm_probs / rowSums(unnorm_probs) # normalized probabilities
-}
-
-backward = function(matrix[double] dprobs, matrix[double] scores)
- return (matrix[double] dscores) {
- /*
- * Computes the backward pass for a softmax classifier.
- *
- * Note that dscores_ij has multiple source branches:
- *
- * ```
- * dprobs_ij/dscores_ij = probs_ij * (1 - probs_ij)
- * dprobs_ik/dscores_ij = -probs_ik * probs_ij, for all k != j
- *
- * dloss/dscores_ij =
- * (dloss/dprobs_ij * dprobs_ij/dscores_ij)
- * + sum_{k!=j}(dloss/dprobs_ik * dprobs_ik/dscores_ij)
- * ```
- *
- * Inputs:
- * - dprobs: Gradient wrt `probs` from upstream, of shape (N, D).
- * - scores: Inputs, of shape (N, D).
- *
- * Outputs:
- * - dscores: Gradient wrt `scores`, of shape (N, D).
- */
- scores = scores - rowMaxs(scores) # numerical stability
- unnorm_probs = exp(scores) # unnormalized probabilities
- probs = unnorm_probs / rowSums(unnorm_probs) # normalized probabilities
- # After some cancellation:
- # dscores = dprobs*probs - probs*rowSums(dprobs*probs)
- dtemp = dprobs * probs
- dscores = dtemp - probs*rowSums(dtemp)
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/layers/tanh.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/tanh.dml b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
deleted file mode 100644
index d849d70..0000000
--- a/scripts/staging/SystemML-NN/nn/layers/tanh.dml
+++ /dev/null
@@ -1,65 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Tanh nonlinearity layer.
- */
-source("nn/layers/sigmoid.dml") as sigmoid
-
-forward = function(matrix[double] X)
- return (matrix[double] out) {
- /*
- * Computes the forward pass for a tanh nonlinearity layer.
- *
- * ```
- * tanh(x) = (e^x - e^-x) / (e^x + e^-x)
- * = 2 * sigmoid(2x) - 1
- * ```
- *
- * Inputs:
- * - X: Inputs, of shape (any, any).
- *
- * Outputs:
- * - out: Outputs, of same shape as `X`.
- */
- # out = (exp(X) - exp(-X)) / (exp(X) + exp(-X))
- # Simplification of the above formulation to use the sigmoid function:
- sigma2X = sigmoid::forward(2*X)
- out = 2*sigma2X - 1
-}
-
-backward = function(matrix[double] dout, matrix[double] X)
- return (matrix[double] dX) {
- /*
- * Computes the backward pass for a tanh nonlinearity layer.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of same shape as `X`.
- * - X: Inputs, of shape (any, any).
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of same shape as `X`.
- */
- sigma2X = sigmoid::forward(2*X)
- out = 2*sigma2X - 1
- dX = (1-out^2) * dout
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
deleted file mode 100644
index 85b1c41..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
+++ /dev/null
@@ -1,77 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Adagrad optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr, double epsilon,
- matrix[double] cache)
- return (matrix[double] X, matrix[double] cache) {
- /*
- * Performs an Adagrad update.
- *
- * This is an adaptive learning rate optimizer that maintains the
- * sum of squared gradients to automatically adjust the effective
- * learning rate.
- *
- * Reference:
- * - Adaptive Subgradient Methods for Online Learning and Stochastic
- * Optimization, Duchi et al.
- * - http://jmlr.org/papers/v12/duchi11a.html
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate.
- * - epsilon: Smoothing term to avoid divide by zero errors.
- * Typical values are in the range of [1e-8, 1e-4].
- * - cache: State that maintains per-parameter sum of squared
- * gradients, of same shape as `X`.
- *
- * Outputs:
- * - X: Updated parameters `X`, of same shape as input `X`.
- * - cache: State that maintains per-parameter sum of squared
- * gradients, of same shape as `X`.
- */
- cache = cache + dX^2
- X = X - (lr * dX / (sqrt(cache)+epsilon))
-}
-
-init = function(matrix[double] X)
- return (matrix[double] cache) {
- /*
- * Initialize the state for this optimizer.
- *
- * Note: This is just a convenience function, and state
- * may be initialized manually if needed.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- *
- * Outputs:
- * - cache: State that maintains per-parameter sum of squared
- * gradients, of same shape as `X`.
- */
- cache = matrix(0, rows=nrow(X), cols=ncol(X))
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/adam.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adam.dml b/scripts/staging/SystemML-NN/nn/optim/adam.dml
deleted file mode 100644
index 4b6fa2a..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/adam.dml
+++ /dev/null
@@ -1,97 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Adam optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr, double beta1, double beta2,
- double epsilon, int t, matrix[double] m, matrix[double] v)
- return (matrix[double] X, matrix[double] m, matrix[double] v) {
- /*
- * Performs an Adam update.
- *
- * Reference:
- * - Adam: A Method for Stochastic Optimization, Kingma, Ba.
- * - http://arxiv.org/abs/1412.6980
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate. Recommended value is 0.001.
- * - beta1: Exponential decay rate for the 1st moment estimates.
- * Recommended value is 0.9.
- * - beta2: Exponential decay rate for the 2nd moment estimates.
- * Recommended value is 0.999.
- * - epsilon: Smoothing term to avoid divide by zero errors.
- * Recommended value is 1e-8.
- * - t: Timestep, starting at 0.
- * - m: State containing the 1st moment (mean) estimate by
- * maintaining exponential moving averages of the gradients, of
- * same shape as `X`.
- * - v: State containing the 2nd raw moment (uncentered variance)
- * estimate by maintaining exponential moving averages of the
- * squared gradients, of same shape as `X`.
- *
- * Outputs:
- * - X: Updated parameters `X`, of same shape as input `X`.
- * - m: Updated state containing the 1st moment (mean) estimate by
- * maintaining exponential moving averages of the gradients, of
- * same shape as `X`.
- * - v: Updated state containing the 2nd raw moment (uncentered
- * variance) estimate by maintaining exponential moving averages
- * of the squared gradients, of same shape as `X`.
- */
- t = t + 1
- m = beta1*m + (1-beta1)*dX # update biased 1st moment estimate
- v = beta2*v + (1-beta2)*dX^2 # update biased 2nd raw moment estimate
- # m = m / (1-beta1^t) # compute bias-corrected 1st moment estimate
- # v = v / (1-beta2^t) # compute bias-corrected 2nd raw moment estimate
- # X = X - (lr * m / (sqrt(v)+epsilon)) # param update
- # Simplified for computational efficiency:
- lr = lr * sqrt(1-beta2^t) / (1-beta1^t)
- X = X - (lr * m / (sqrt(v)+epsilon))
-}
-
-init = function(matrix[double] X)
- return (matrix[double] m, matrix[double] v) {
- /*
- * Initialize the state for this optimizer.
- *
- * Note: This is just a convenience function, and state
- * may be initialized manually if needed.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- *
- * Outputs:
- * - m: Initial state containing the 1st moment (mean) estimate by
- * maintaining exponential moving averages of the gradients, of
- * same shape as `X`.
- * - v: Initial state containing the 2nd raw moment (uncentered
- * variance) estimate by maintaining exponential moving averages
- * of the squared gradients, of same shape as `X`.
- */
- m = matrix(0, rows=nrow(X), cols=ncol(X))
- v = matrix(0, rows=nrow(X), cols=ncol(X))
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
deleted file mode 100644
index 1feccaf..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
+++ /dev/null
@@ -1,79 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * RMSprop optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr, double decay_rate,
- double epsilon, matrix[double] cache)
- return (matrix[double] X, matrix[double] cache) {
- /*
- * Performs an RMSprop update.
- *
- * This is an adaptive learning rate optimizer that can be viewed
- * as an adjustment of the Adagrad method to use a moving average
- * of the sum of squared gradients in order to improve convergence.
- *
- * Reference:
- * - Neural Networks for Machine Learning, Lecture 6a, Hinton,
- * slide 29.
- * - http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate.
- * - decay_rate: Term controlling the rate of the moving average.
- * Typical values are in the range of [0.9, 0.999].
- * - epsilon: Smoothing term to avoid divide by zero errors.
- * Typical values are in the range of [1e-8, 1e-4].
- * - cache: State that maintains the moving average of the squared
- * gradients, of same shape as `X`.
- *
- * Outputs:
- * - X: Updated parameters `X`, of same shape as input `X`.
- * - cache: Updated state that maintains the moving average of the
- * squared gradients, of same shape as `X`.
- */
- cache = decay_rate*cache + (1-decay_rate)*dX^2
- X = X - (lr * dX / (sqrt(cache)+epsilon))
-}
-
-init = function(matrix[double] X)
- return (matrix[double] cache) {
- /*
- * Initialize the state for this optimizer.
- *
- * Note: This is just a convenience function, and state
- * may be initialized manually if needed.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- *
- * Outputs:
- * - cache: State that maintains the moving average of the squared
- * gradients, of same shape as `X`.
- */
- cache = matrix(0, rows=nrow(X), cols=ncol(X))
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/sgd.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd.dml b/scripts/staging/SystemML-NN/nn/optim/sgd.dml
deleted file mode 100644
index 3ba7eba..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/sgd.dml
+++ /dev/null
@@ -1,42 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Stochastic Gradient Descent (SGD) optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr)
- return (matrix[double] X) {
- /*
- * Performs a vanilla SGD update.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate.
- *
- * Outputs:
- * - X: Updated parameters `X`, of same shape as input `X`.
- */
- X = X - lr*dX
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
deleted file mode 100644
index 85922da..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
+++ /dev/null
@@ -1,71 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Stochastic Gradient Descent with momentum (SGD-momentum) optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v)
- return (matrix[double] X, matrix[double] v) {
- /*
- * Performs an SGD update with momentum.
- *
- * In SGD with momentum, we assume that the parameters have a velocity
- * that continues with some momentum, and that is influenced by the
- * gradient.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate.
- * - mu: Momentum value.
- * Typical values are in the range of [0.5, 0.99], usually
- * started at the lower end and annealed towards the higher end.
- * - v: State maintaining the velocity of the parameters `X`, of same
- * shape as `X`.
- *
- * Outputs:
- * - X: Updated parameters `X`, of same shape as input `X`.
- * - v: Updated velocity of the parameters `X`, of same shape as
- * input `X`.
- */
- v = mu*v - lr*dX # update velocity
- X = X + v # update position
-}
-
-init = function(matrix[double] X)
- return (matrix[double] v) {
- /*
- * Initialize the state for this optimizer.
- *
- * Note: This is just a convenience function, and state
- * may be initialized manually if needed.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- *
- * Outputs:
- * - v: Initial velocity of the parameters `X`.
- */
- v = matrix(0, rows=nrow(X), cols=ncol(X))
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
deleted file mode 100644
index 3b62c6e..0000000
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
+++ /dev/null
@@ -1,81 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * Stochastic Gradient Descent with Nesterov momentum (SGD-Nesterov) optimizer.
- */
-
-update = function(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v)
- return (matrix[double] X, matrix[double] v) {
- /*
- * Performs an SGD update with Nesterov momentum.
- *
- * As with regular SGD with momentum, in SGD with Nesterov momentum,
- * we assume that the parameters have a velocity that continues
- * with some momentum, and that is influenced by the gradient.
- * In this view specifically, we perform the position update from the
- * position that the momentum is about to carry the parameters to,
- * rather than from the previous position. Additionally, we always
- * store the parameters in their position after momentum.
- *
- * Reference:
- * - Advances in optimizing Recurrent Networks, Bengio et al.,
- * section 3.5.
- * - http://arxiv.org/abs/1212.0901
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- * - dX: Gradient wrt `X` of a loss function being optimized, of
- * same shape as `X`.
- * - lr: Learning rate.
- * - mu: Momentum value.
- * Typical values are in the range of [0.5, 0.99], usually
- * started at the lower end and annealed towards the higher end.
- * - v: State maintaining the velocity of the parameters `X`, of same
- * shape as `X`.
- *
- * Outputs:
- * - X: Updated parameters X, of same shape as input X.
- * - v: Updated velocity of the parameters X, of same shape as
- * input v.
- */
- v_prev = v
- v = mu*v - lr*dX # update velocity
- X = X - mu*v_prev + (1+mu)*v # update position, including momentum
-}
-
-init = function(matrix[double] X)
- return (matrix[double] v) {
- /*
- * Initialize the state for this optimizer.
- *
- * Note: This is just a convenience function, and state
- * may be initialized manually if needed.
- *
- * Inputs:
- * - X: Parameters to update, of shape (any, any).
- *
- * Outputs:
- * - v: Initial velocity of the parameters `X`.
- */
- v = matrix(0, rows=nrow(X), cols=ncol(X))
-}
-
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/README.md
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/README.md b/scripts/staging/SystemML-NN/nn/test/README.md
deleted file mode 100644
index b714d50..0000000
--- a/scripts/staging/SystemML-NN/nn/test/README.md
+++ /dev/null
@@ -1,32 +0,0 @@
-<!--
-{% comment %}
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements. See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to you under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-{% endcomment %}
--->
-
-# SystemML-NN Tests
-
-#### This folder contains tests for the *SystemML-NN* (`nn`) deep learning library.
-
----
-## Tests
-#### All layers are tested for correct derivatives ("gradient-checking"), and many layers also have correctness tests against simpler reference implementations.
-* `grad_check.dml` - Contains gradient-checks for all layers as individual DML functions.
-* `test.dml` - Contains correctness tests for several of the more complicated layers by checking against simple reference implementations, such as `conv_simple.dml`. All tests are formulated as individual DML functions.
-* `tests.dml` - A DML script that runs all of the tests in `grad_check.dml` and `test.dml`.
-
-## Execution
-* `spark-submit SystemML.jar -f nn/test/tests.dml` from the base of the project.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml b/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
deleted file mode 100644
index 9f126d0..0000000
--- a/scripts/staging/SystemML-NN/nn/test/conv2d_simple.dml
+++ /dev/null
@@ -1,213 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * 2D Convolutional layer.
- *
- * This implementation is intended to be a simple, reference version.
- */
-
-forward = function(matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] out, int Hout, int Wout) {
- /*
- * Computes the forward pass for a 2D spatial convolutional layer with
- * F filters. The input data has N examples, each represented as a 3D
- * volume unrolled into a single vector.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - out: Outputs, of shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- */
- N = nrow(X)
- F = nrow(W)
- Hout = as.integer(floor((Hin + 2*padh - Hf)/strideh + 1))
- Wout = as.integer(floor((Win + 2*padw - Wf)/stridew + 1))
-
- # Create output volume
- out = matrix(0, rows=N, cols=F*Hout*Wout)
-
- # Convolution - Simple reference implementation
- parfor (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- # Convolve image with filters
- parfor (f in 1:F, check=0) { # all filters
- parfor (hout in 1:Hout, check=0) { # all output rows
- h0 = (hout-1)*strideh + 1
- parfor (wout in 1:Wout, check=0) { # all output columns
- w0 = (wout-1)*stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- parfor (c in 1:C, check=0) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf], rows=1,
- cols=Hf*Wf) # reshape
- }
- out[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout] =
- W[f,] %*% matrix(Xn_padded_patch, rows=C*Hf*Wf, cols=1) + b[f,]
- }
- }
- }
- }
-}
-
-backward = function(matrix[double] dout, int Hout, int Wout,
- matrix[double] X, matrix[double] W, matrix[double] b,
- int C, int Hin, int Win, int Hf, int Wf,
- int strideh, int stridew, int padh, int padw)
- return (matrix[double] dX, matrix[double] dW, matrix[double] db) {
- /*
- * Computes the backward pass for a 2D spatial convolutional layer
- * with F filters.
- *
- * This implementation is intended to be a simple, reference version.
- *
- * Inputs:
- * - dout: Gradient wrt `out` from upstream, of
- * shape (N, F*Hout*Wout).
- * - Hout: Output height.
- * - Wout: Output width.
- * - X: Inputs, of shape (N, C*Hin*Win).
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- * - C: Number of input channels (dimensionality of input depth).
- * - Hin: Input height.
- * - Win: Input width.
- * - Hf: Filter height.
- * - Wf: Filter width.
- * - strideh: Stride over height.
- * - stridew: Stride over width.
- * - padh: Padding for top and bottom sides.
- * - padw: Padding for left and right sides.
- *
- * Outputs:
- * - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
- * - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
- * - db: Gradient wrt `b`, of shape (F, 1).
- */
- N = nrow(X)
- F = nrow(W)
-
- # Create gradient volumes
- dX = matrix(0, rows=N, cols=C*Hin*Win)
- dW = matrix(0, rows=F, cols=C*Hf*Wf)
- db = matrix(0, rows=F, cols=1)
-
- # Partial derivatives for convolution - Simple reference implementation
- for (n in 1:N) { # all examples
- Xn = matrix(X[n,], rows=C, cols=Hin*Win)
- # Pad image
- Xn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw)) # zeros
- parfor (c in 1:C) {
- Xn_slice = matrix(Xn[c,], rows=Hin, cols=Win) # depth slice C reshaped
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw)
- Xn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = Xn_slice
- Xn_padded[c,] = matrix(Xn_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw)) # reshape
- }
- dXn_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))
- for (f in 1:F) { # all filters
- for (hout in 1:Hout) { # all output rows
- h0 = (hout-1) * strideh + 1
- for (wout in 1:Wout) { # all output columns
- w0 = (wout-1) * stridew + 1
- # Create a patch of the input example corresponding spatially to the filter sizes
- Xn_padded_patch = matrix(0, rows=C, cols=Hf*Wf) # zeros
- dXn_padded_patch = matrix(W[f,] * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout],
- rows=C, cols=Hf*Wf) # reshape
- for (c in 1:C) {
- Xn_padded_slice = matrix(Xn_padded[c,], rows=Hin+2*padh, cols=Win+2*padw) # reshape
- Xn_padded_patch[c,] = matrix(Xn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf],
- rows=1, cols=Hf*Wf) # reshape
- dXn_padded_slice = matrix(0, rows=Hin+2*padh, cols=Win+2*padw)
- dXn_padded_slice[h0:h0-1+Hf, w0:w0-1+Wf] = matrix(dXn_padded_patch[c,],
- rows=Hf, cols=Wf) # reshape
- dXn_padded[c,] = dXn_padded[c,] + matrix(dXn_padded_slice,
- rows=1, cols=(Hin+2*padh)*(Win+2*padw))
- }
- dW[f,] = dW[f,]
- + matrix(Xn_padded_patch, rows=1, cols=C*Hf*Wf)
- * dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- db[f,] = db[f,] + dout[n, (f-1)*Hout*Wout + (hout-1)*Wout + wout]
- }
- }
- }
- # Unpad derivs on input
- dXn = matrix(0, rows=C, cols=Hin*Win)
- parfor (c in 1:C, check=0) {
- dXn_padded_slice = matrix(dXn_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
- dXn_slice = dXn_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
- dXn[c,] = matrix(dXn_slice, rows=1, cols=Hin*Win)
- }
- dX[n,] = matrix(dXn, rows=1, cols=C*Hin*Win)
- }
-}
-
-init = function(int F, int C, int Hf, int Wf)
- return (matrix[double] W, matrix[double] b) {
- /*
- * Initialize the parameters of this layer.
- *
- * We use the heuristic by He et al., which limits the magnification
- * of inputs/gradients during forward/backward passes by scaling
- * unit-Gaussian weights by a factor of sqrt(2/n), under the
- * assumption of relu neurons.
- * - http://arxiv.org/abs/1502.01852
- *
- * Inputs:
- * - F: Number of filters.
- * - C: Number of input channels (dimensionality of depth).
- * - Hf: Filter height.
- * - Wf: Filter width.
- *
- * Outputs:
- * - W: Weights, of shape (F, C*Hf*Wf).
- * - b: Biases, of shape (F, 1).
- */
- W = rand(rows=F, cols=C*Hf*Wf, pdf="normal") * sqrt(2.0/(C*Hf*Wf))
- b = matrix(0, rows=F, cols=1)
-}
-