You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@systemml.apache.org by du...@apache.org on 2016/05/28 00:55:11 UTC

[1/2] incubator-systemml git commit: Adding a smoothing term to the cross-entropy loss function for numerical stability in situations in which predictions are exactly equal to 0.

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 6d95c9f5e -> b14d55bed


Adding a smoothing term to the cross-entropy loss function for numerical stability in situations in which predictions are exactly equal to 0.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ba60e73e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ba60e73e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ba60e73e

Branch: refs/heads/master
Commit: ba60e73eb1ba097eeeb003f8f297745f460b59ff
Parents: 6d95c9f
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri May 27 17:44:30 2016 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri May 27 17:44:30 2016 -0700

----------------------------------------------------------------------
 .../nn/layers/cross_entropy_loss.dml            |  6 ++--
 scripts/staging/SystemML-NN/nn/test/test.dml    | 32 ++++++++++++++++++--
 scripts/staging/SystemML-NN/nn/test/tests.dml   |  1 +
 3 files changed, 35 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba60e73e/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
index 6b9840f..306ea96 100644
--- a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
@@ -41,7 +41,8 @@ forward = function(matrix[double] pred, matrix[double] y)
    *  - loss: Scalar loss, of shape (1).
    */
   N = nrow(y)
-  losses = rowSums(-y * log(pred))
+  eps = 1e-10  # numerical stability to avoid log(0)
+  losses = rowSums(-y * log(pred+eps))
   loss = sum(losses) / N
 }
 
@@ -60,6 +61,7 @@ backward = function(matrix[double] pred, matrix[double] y)
    *  - dpred: Gradient wrt pred, of shape (N, K).
    */
   N = nrow(y)
-  dpred = (1/N) * -y * (1/pred)
+  eps = 1e-10  # numerical stability to avoid divide-by-zero
+  dpred = (1/N) * -y * (1/(pred+eps))
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba60e73e/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index 58ee3e1..1ecff68 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -24,6 +24,7 @@
  */
 source("nn/layers/conv.dml") as conv
 source("nn/layers/conv_builtin.dml") as conv_builtin
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
 source("nn/layers/max_pool.dml") as max_pool
 source("nn/layers/max_pool_builtin.dml") as max_pool_builtin
 source("nn/test/conv_simple.dml") as conv_simple
@@ -68,6 +69,31 @@ conv = function() {
   }
 }
 
+cross_entropy_loss = function() {
+  /*
+   * Test for the `cross-entropy` loss function.
+   *
+   * Here we make sure that the cross-entropy loss function does
+   * not propagate `infinity` values in the case that a prediction is
+`  * exactly equal to 0.
+   */
+  print("Testing the cross-entropy loss function with zero-valued predictions.")
+
+  # Generate data
+  N = 3 # num examples
+  K = 10 # num targets
+  pred = matrix(0, rows=N, cols=K)
+  y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform")
+  y = y / rowSums(y)  # normalized probs
+  
+  loss = cross_entropy_loss::forward(pred, y)
+  
+  inf = 1/0
+  if (loss == inf) {
+      print("ERROR: The cross-entropy loss function ouptuts infinity for all-zero predictions.")
+  }
+}
+
 im2col = function() {
   /*
    * Test for the `im2col` and `col2im` functions.
@@ -97,8 +123,9 @@ im2col = function() {
 
   # Equivalency check
   equivalent = util::all_equal(x_pad, x_pad2)
-  if (!equivalent)
+  if (!equivalent) {
     print("ERROR: im2col and then col2im does not yield the original image.")
+  }
 }
 
 padding = function() {
@@ -135,8 +162,9 @@ padding = function() {
 
   # Equivalency check
   equivalent = util::all_equal(x, x1)
-  if (!equivalent)
+  if (!equivalent) {
     print("ERROR: Padding and then unpadding does not yield the original image.")
+  }
 }
 
 max_pool = function() {

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba60e73e/scripts/staging/SystemML-NN/nn/test/tests.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/tests.dml b/scripts/staging/SystemML-NN/nn/test/tests.dml
index 1b91967..cac56c2 100644
--- a/scripts/staging/SystemML-NN/nn/test/tests.dml
+++ b/scripts/staging/SystemML-NN/nn/test/tests.dml
@@ -63,6 +63,7 @@ print("---")
 tmp = test::im2col()
 tmp = test::padding()
 tmp = test::conv()
+tmp = test::cross_entropy_loss()
 tmp = test::max_pool()
 
 print("---")

[2/2] incubator-systemml git commit: Adding some more internal SystemML-NN documentation for clarification.

Posted by du...@apache.org.

Adding some more internal SystemML-NN documentation for clarification.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/b14d55be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/b14d55be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/b14d55be

Branch: refs/heads/master
Commit: b14d55bed7e1960db69337d7c2fd840d89e630c2
Parents: ba60e73
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri May 27 17:55:08 2016 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri May 27 17:55:08 2016 -0700

----------------------------------------------------------------------
 .../staging/SystemML-NN/nn/layers/affine.dml    |  3 +++
 scripts/staging/SystemML-NN/nn/layers/conv.dml  |  3 +++
 .../SystemML-NN/nn/layers/conv_builtin.dml      |  3 +++
 .../nn/layers/cross_entropy_loss.dml            |  4 +++
 .../staging/SystemML-NN/nn/layers/dropout.dml   |  3 ++-
 .../staging/SystemML-NN/nn/layers/l1_loss.dml   |  3 +++
 .../staging/SystemML-NN/nn/layers/l2_loss.dml   |  3 +++
 .../staging/SystemML-NN/nn/layers/log_loss.dml  |  3 +++
 .../staging/SystemML-NN/nn/layers/softmax.dml   | 27 +++++++++++++-------
 scripts/staging/SystemML-NN/nn/layers/tanh.dml  |  4 +--
 .../staging/SystemML-NN/nn/optim/adagrad.dml    |  3 +++
 scripts/staging/SystemML-NN/nn/optim/adam.dml   |  3 +++
 .../staging/SystemML-NN/nn/optim/rmsprop.dml    |  3 +++
 .../SystemML-NN/nn/optim/sgd_momentum.dml       |  3 +++
 .../SystemML-NN/nn/optim/sgd_nesterov.dml       |  3 +++
 15 files changed, 59 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/affine.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/affine.dml b/scripts/staging/SystemML-NN/nn/layers/affine.dml
index 1338de4..e7e4fd8 100644
--- a/scripts/staging/SystemML-NN/nn/layers/affine.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/affine.dml
@@ -66,6 +66,9 @@ init = function(int D, int M)
     return (matrix[double] W, matrix[double] b) {
   /*
    * Initialize the parameters of this layer.
+   *
+   * Note: This is just a convenience function, and parameters
+   * may be initialized manually if needed.
    * 
    * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
    * which limits the magnification of inputs/gradients during

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/conv.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv.dml b/scripts/staging/SystemML-NN/nn/layers/conv.dml
index 0fbcf99..1b737f5 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv.dml
@@ -161,6 +161,9 @@ init = function(int F, int C, int Hf, int Wf)
     return (matrix[double] W, matrix[double] b) {
   /*
    * Initialize the parameters of this layer.
+   *
+   * Note: This is just a convenience function, and parameters
+   * may be initialized manually if needed.
    * 
    * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
    * which limits the magnification of inputs/gradients during

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
index a73405e..7042eb2 100644
--- a/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/conv_builtin.dml
@@ -125,6 +125,9 @@ init = function(int F, int C, int Hf, int Wf)
     return (matrix[double] W, matrix[double] b) {
   /*
    * Initialize the parameters of this layer.
+   *
+   * Note: This is just a convenience function, and parameters
+   * may be initialized manually if needed.
    * 
    * We use the heuristic by He et al. [http://arxiv.org/abs/1502.01852],
    * which limits the magnification of inputs/gradients during

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
index 306ea96..9e3e7cd 100644
--- a/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/cross_entropy_loss.dml
@@ -33,6 +33,10 @@ forward = function(matrix[double] pred, matrix[double] y)
    * inputs consist of N examples, each with K dimensions corresponding
    * to normalized probabilities of K classes.
    *
+   * This can be interpreted as the negative log-likelihood assuming
+   * a Bernoulli distribution generalized to K dimensions, or a
+   * Multinomial with 1 observation.
+   *
    * Inputs:
    *  - pred: Prediction matrix, of shape (N, K).
    *  - y: Target matrix, of shape (N, K).

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/dropout.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/dropout.dml b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
index e3c34f9..6c0b0d0 100644
--- a/scripts/staging/SystemML-NN/nn/layers/dropout.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/dropout.dml
@@ -42,8 +42,9 @@ forward = function(matrix[double] X, double p, int seed)
    *  - out: Ouptuts, of same shape as X.
    *  - mask: Dropout mask used to compute the output.
    */
-  if (seed == -1)
+  if (seed == -1) {
     seed = as.integer(floor(as.scalar(rand(rows=1, cols=1, min=1, max=100000))))
+  }
   mask = rand(rows=nrow(X), cols=ncol(X), min=0, max=1, seed=seed) <= p
   out = X * mask / p
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
index 00db8a7..6c625e8 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l1_loss.dml
@@ -31,6 +31,9 @@ forward = function(matrix[double] pred, matrix[double] y)
    * Computes the forward pass for an L1 loss function.  The inputs
    * consist of N examples, each with M dimensions to predict.
    *
+   * This can be interpreted as the negative log-likelihood assuming
+   * a Laplace distribution.
+   *
    * Inputs:
    *  - pred: Prediction matrix, of shape (N, M).
    *  - y: Target matrix, of shape (N, M).

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
index 13b6c2d..c4a8618 100644
--- a/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/l2_loss.dml
@@ -31,6 +31,9 @@ forward = function(matrix[double] pred, matrix[double] y)
    * Computes the forward pass for an L2 loss function.  The inputs
    * consist of N examples, each with M dimensions to predict.
    *
+   * This can be interpreted as the negative log-likelihood assuming
+   * a Gaussian distribution.
+   *
    * Inputs:
    *  - pred: Prediction matrix, of shape (N, M).
    *  - y: Target matrix, of shape (N, M).

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
index e3da456..0bcb02e 100644
--- a/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/log_loss.dml
@@ -31,6 +31,9 @@ forward = function(matrix[double] pred, matrix[double] y)
   /*
    * Computes the forward pass for a log loss function.
    *
+   * This can be interpreted as the negative log-likelihood assuming
+   * a Bernoulli distribution.
+   *
    * Inputs:
    *  - pred: Prediction matrix, of shape (N, 1).  Predictions should
    *      be probabilities that y=1.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/softmax.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/softmax.dml b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
index 2576162..111e1b3 100644
--- a/scripts/staging/SystemML-NN/nn/layers/softmax.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/softmax.dml
@@ -29,6 +29,11 @@ forward = function(matrix[double] scores) return (matrix[double] probs) {
    * N examples, and the softmax function transforms them to normalized
    * probabilities.
    *
+   * This can be interpreted as a generalization of the sigmoid
+   * function to multiple classes.
+   *
+   * probs_ij = e^scores_ij / sum(e^scores)
+   *
    * Inputs:
    *  - scores: Input data matrix, of shape (N, D).
    *
@@ -36,14 +41,14 @@ forward = function(matrix[double] scores) return (matrix[double] probs) {
    *  - probs: Outputs, of shape (N, D).
    */
   # For numerical stability, we subtract the max score of an example from all scores for that
-  # example.  This is equivalent:
+  # example.  This is equivalent to the original formulation:
   # e^scores_i / sum(e^scores_i) == C*e^scores_i / C*sum(e^scores_i)
   #                              == e^(scores_i+log(C)) / sum(e^(scores_i+log(C))
   # set log(C) = -max(scores_i):
   #                              == e^(scores_i-max(scores_i)) / sum(e^(scores_i-max(scores_i))
   scores = scores - rowMaxs(scores)  # numerical stability
-  unnorm_probs = exp(scores)
-  probs = unnorm_probs / rowSums(unnorm_probs)
+  unnorm_probs = exp(scores)  # unnormalized probabilities
+  probs = unnorm_probs / rowSums(unnorm_probs)  # normalized probabilities
 }
 
 backward = function(matrix[double] dprobs, matrix[double] scores)
@@ -51,11 +56,13 @@ backward = function(matrix[double] dprobs, matrix[double] scores)
   /*
    * Computes the backward pass for a softmax classifier.
    *
+   * Note that dscores_ij has multiple sources:
+   *
    * dprobs_ij/dscores_ij = probs_ij * (1 - probs_ij)
-   * dprobs_ic/dscores_ij = probs_ij * -probs_ic
+   * dprobs_ik/dscores_ij = -probs_ik * probs_ij, for all k != j
    *
    * dloss/dscores_ij = dloss/dprobs_ij * dprobs_ij/dscores_ij + 
-   *                    sum_c(dloss/dprobs_ic * dprobs_ic/dscores_ij)
+   *                    sum_{k!=j}(dloss/dprobs_ik * dprobs_ik/dscores_ij)
    *
    * Inputs:
    *  - dprobs: Derivatives from upstream, of shape (N, D).
@@ -65,9 +72,11 @@ backward = function(matrix[double] dprobs, matrix[double] scores)
    *  - dscores: Gradient wrt scores, of shape (N, D).
    */
   scores = scores - rowMaxs(scores)  # numerical stability
-  unnorm_probs = exp(scores)
-  probs = unnorm_probs / rowSums(unnorm_probs)
-  dscores = dprobs * probs
-  dscores = dscores - probs * rowSums(dscores)
+  unnorm_probs = exp(scores)  # unnormalized probabilities
+  probs = unnorm_probs / rowSums(unnorm_probs)  # normalized probabilities
+  # After some cancellation:
+  # dscores = dprobs*probs - probs*rowSums(dprobs*probs)
+  dtemp = dprobs * probs
+  dscores = dtemp - probs * rowSums(dtemp)
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/layers/tanh.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/layers/tanh.dml b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
index e886081..0fadf77 100644
--- a/scripts/staging/SystemML-NN/nn/layers/tanh.dml
+++ b/scripts/staging/SystemML-NN/nn/layers/tanh.dml
@@ -26,7 +26,7 @@ forward = function(matrix[double] X) return (matrix[double] out) {
   /*
    * Computes the forward pass for a tanh nonlinearity layer.
    *
-   * tanh(x) = (e^x - e^-x) / (e^x + e^-x)
+   * tanh(x) = (e^x - e^-x) / (e^x + e^-x) = sigmoid(-2x)
    *
    * Inputs:
    *  - X: Input data matrix, of shape (any, any).
@@ -34,7 +34,7 @@ forward = function(matrix[double] X) return (matrix[double] out) {
    * Outputs:
    *  - out: Ouptuts, of same shape as X.
    */
-  # Simplification of the above formulation:
+  # Simplification of the above formulation to use the sigmoid function:
   sigma2X = 1 / (1 + exp(-2*X))
   out = 2 * sigma2X - 1
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
index daa5f5e..688109b 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adagrad.dml
@@ -60,6 +60,9 @@ init = function(matrix[double] X) return (matrix[double] cache) {
   /*
    * Initialize the state for this optimizer.
    *
+   * Note: This is just a convenience function, and state
+   * may be initialized manually if needed.
+   *
    * Inputs:
    *  - X: Parameters to update, of shape (any, any).
    * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/optim/adam.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/adam.dml b/scripts/staging/SystemML-NN/nn/optim/adam.dml
index 05152f4..a25f74d 100644
--- a/scripts/staging/SystemML-NN/nn/optim/adam.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/adam.dml
@@ -75,6 +75,9 @@ init = function(matrix[double] X) return (matrix[double] m, matrix[double] v) {
   /*
    * Initialize the state for this optimizer.
    *
+   * Note: This is just a convenience function, and state
+   * may be initialized manually if needed.
+   *
    * Inputs:
    *  - X: Parameters to update, of shape (any, any).
    * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
index 31b78d5..e256000 100644
--- a/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/rmsprop.dml
@@ -62,6 +62,9 @@ init = function(matrix[double] X) return (matrix[double] cache) {
   /*
    * Initialize the state for this optimizer.
    *
+   * Note: This is just a convenience function, and state
+   * may be initialized manually if needed.
+   *
    * Inputs:
    *  - X: Parameters to update, of shape (any, any).
    * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
index 22a88f2..c2a441b 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_momentum.dml
@@ -55,6 +55,9 @@ init = function(matrix[double] X) return (matrix[double] v) {
   /*
    * Initialize the state for this optimizer.
    *
+   * Note: This is just a convenience function, and state
+   * may be initialized manually if needed.
+   *
    * Inputs:
    *  - X: Parameters to update, of shape (any, any).
    * 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/b14d55be/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
index aac6522..56c6ab0 100644
--- a/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
+++ b/scripts/staging/SystemML-NN/nn/optim/sgd_nesterov.dml
@@ -64,6 +64,9 @@ init = function(matrix[double] X) return (matrix[double] v) {
   /*
    * Initialize the state for this optimizer.
    *
+   * Note: This is just a convenience function, and state
+   * may be initialized manually if needed.
+   *
    * Inputs:
    *  - X: Parameters to update, of shape (any, any).
    *