You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/26 21:42:33 UTC

[07/11] incubator-systemml git commit: [SYSTEMML-1524] Graduate `nn` library to `scripts/nn`

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/test.dml b/scripts/nn/test/test.dml
new file mode 100644
index 0000000..a5cb497
--- /dev/null
+++ b/scripts/nn/test/test.dml
@@ -0,0 +1,549 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Various tests, not including gradient checks.
+ */
+source("nn/layers/batch_norm1d.dml") as batch_norm1d
+source("nn/layers/batch_norm2d.dml") as batch_norm2d
+source("nn/layers/conv2d.dml") as conv2d
+source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
+source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+source("nn/layers/max_pool2d.dml") as max_pool2d
+source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
+source("nn/layers/tanh.dml") as tanh
+source("nn/test/conv2d_simple.dml") as conv2d_simple
+source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
+source("nn/test/util.dml") as test_util
+source("nn/util.dml") as util
+
+batch_norm1d = function() {
+  /*
+   * Test for the 1D batch normalization function.
+   */
+  print("Testing the 1D batch normalization function.")
+
+  # Generate data
+  N = 4  # Number of examples
+  D = 4  # Number of features
+  mode = 'train'  # execution mode
+  mu = 0.9  # momentum of moving averages
+  eps = 1e-5  # smoothing term
+  X = matrix(seq(1,16), rows=N, cols=D)
+
+  # Create layer
+  [gamma, beta, ema_mean, ema_var] = batch_norm1d::init(D)
+
+  # Forward
+  [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
+      batch_norm1d::forward(X, gamma, beta, mode, ema_mean, ema_var, mu, eps)
+
+  # Equivalency check
+  target = matrix("-1.34160721 -1.34160721 -1.34160733 -1.34160709
+                   -0.44720244 -0.44720244 -0.44720244 -0.44720232
+                    0.44720244  0.44720232  0.44720244  0.44720244
+                    1.34160733  1.34160721  1.34160733  1.34160733", rows=1, cols=N*D)
+  out = matrix(out, rows=1, cols=N*D)
+  for (i in 1:length(out)) {
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                           as.scalar(target[1,i]), 1e-3, 1e-4)
+  }
+}
+
+conv2d = function() {
+  /*
+   * Test for the 2D convolution functions.
+   */
+  print("Testing the 2D convolution functions.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 3  # num channels
+  Hin = 5  # input height
+  Win = 5  # input width
+  F = 2  # num filters
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 1
+  pad = 1
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+
+  # Create layer
+  [W, b] = conv2d::init(F, C, Hf, Wf)
+
+  # Forward
+  [out, Hout, Wout] = conv2d::forward(X, W, b, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+  [out_simple, Hout_simple, Wout_simple] = conv2d_simple::forward(X, W, b, C, Hin, Win, Hf, Wf,
+                                                                  stride, stride, pad, pad)
+  [out_builtin, Hout_builtin, Wout_builtin] = conv2d_builtin::forward(X, W, b, C, Hin, Win, Hf, Wf,
+                                                                      stride, stride, pad, pad)
+
+  # Equivalency check
+  out = matrix(out, rows=1, cols=N*F*Hout*Wout)
+  out_simple = matrix(out_simple, rows=1, cols=N*F*Hout*Wout)
+  out_builtin = matrix(out_builtin, rows=1, cols=N*F*Hout*Wout)
+  for (i in 1:length(out)) {
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                           as.scalar(out_simple[1,i]), 1e-10, 1e-12)
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                           as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
+  }
+}
+
+cross_entropy_loss = function() {
+  /*
+   * Test for the cross-entropy loss function.
+   *
+   * Here we make sure that the cross-entropy loss function does
+   * not propagate `infinity` values in the case that a prediction is
+`  * exactly equal to 0.
+   */
+  print("Testing the cross-entropy loss function with zero-valued predictions.")
+
+  # Generate data
+  N = 3 # num examples
+  K = 10 # num targets
+  pred = matrix(0, rows=N, cols=K)
+  y = rand(rows=N, cols=K, min=0, max=1, pdf="uniform")
+  y = y / rowSums(y)  # normalized probs
+
+  loss = cross_entropy_loss::forward(pred, y)
+
+  inf = 1/0
+  if (loss == inf) {
+    print("ERROR: The cross-entropy loss function ouptuts infinity for all-zero predictions.")
+  }
+}
+
+im2col = function() {
+  /*
+   * Test for the `im2col` and `col2im` functions.
+   */
+  print("Testing the im2col and col2im functions.")
+
+	# Generate data
+  C = 3  # num channels
+  Hin = 5  # input height
+  Win = 5  # input width
+  Hf = 3  # filter height
+  Wf = 3  # filter width
+  stride = 2
+  pad = (Hin * stride - Hin + Hf - stride) / 2
+  Hout = as.integer(floor((Hin + 2*pad - Hf)/stride + 1))
+  Wout = as.integer(floor((Win + 2*pad - Wf)/stride + 1))
+  x = rand(rows=C, cols=Hin*Win)
+
+  # pad
+  x_pad = util::pad_image(x, Hin, Win, pad, pad, 0)
+
+  # im2col
+  x_cols = util::im2col(x_pad, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride)
+
+  if (ncol(x_cols) != Hout*Wout) {
+    print("ERROR: im2col does not yield the correct output size: "
+          + ncol(x_cols)+" (actual) vs. "+Hout*Wout+" (correct).")
+  }
+
+  # col2im
+  x_pad2 = util::col2im(x_cols, C, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride, "none")
+
+  # Equivalency check
+  equivalent = test_util::all_equal(x_pad, x_pad2)
+  if (!equivalent) {
+    print("ERROR: im2col and then col2im does not yield the original image.")
+  }
+}
+
+padding = function() {
+  /*
+   * Test for the `pad_image` and `unpad_image` functions.
+   */
+  print("Testing the padding and unpadding functions.")
+
+  # Generate data
+  C = 3  # num channels
+  Hin = 5  # input height
+  Win = 5  # input width
+  pad = 3  # padding
+  x = rand(rows=C, cols=Hin*Win)
+
+  # Pad image
+  x_pad = util::pad_image(x, Hin, Win, pad, pad, 0)
+
+  # Check for padded rows & columns
+  for (c in 1:C) {
+    x_pad_slice = matrix(x_pad[c,], rows=Hin+2*pad, cols=Win+2*pad)
+    for (i in 1:pad) {
+      rowsum = sum(x_pad_slice[i,])
+      colsum = sum(x_pad_slice[,i])
+      if (rowsum != 0)
+        print("ERROR: Padding was not applied to row " + i + ".")
+      if (colsum != 0)
+        print("ERROR: Padding was not applied to column " + i + ".")
+    }
+  }
+
+  # Unpad image
+  x1 = util::unpad_image(x_pad, Hin, Win, pad, pad)
+
+  # Equivalency check
+  equivalent = test_util::all_equal(x, x1)
+  if (!equivalent) {
+    print("ERROR: Padding and then unpadding does not yield the original image.")
+  }
+}
+
+max_pool2d = function() {
+  /*
+   * Test for the 2D max pooling functions.
+   */
+  print("Testing the 2D max pooling functions.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 3  # num channels
+  Hin = 8  # input height
+  Win = 8  # input width
+  Hf = 2  # filter height
+  Wf = 2  # filter width
+  stride = 2
+  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
+
+  for (padh in 0:3) {
+    for (padw in 0:3) {
+      print(" - Testing w/ padh="+padh+" & padw="+padw+".")
+      #if (1==1) {}  # force correct printing
+      #print("   - Testing forward")
+      [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, padh, padw)
+      [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+                                                                          stride, stride,
+                                                                          padh, padw)
+      [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win,
+                                                                              Hf, Wf,
+                                                                              stride, stride,
+                                                                              padh, padw)
+
+      # Equivalency check
+      out = matrix(out, rows=1, cols=N*C*Hout*Wout)
+      out_simple = matrix(out_simple, rows=1, cols=N*C*Hout*Wout)
+      out_builtin = matrix(out_builtin, rows=1, cols=N*C*Hout*Wout)
+      for (i in 1:length(out)) {
+        rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                               as.scalar(out_simple[1,i]), 1e-10, 1e-12)
+        rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                               as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
+      }
+
+      #print("   - Testing backward")
+      dout = rand(rows=N, cols=C*Hout*Wout, pdf="normal")
+      dX = max_pool2d::backward(dout, Hout, Wout, X, C, Hin, Win, Hf, Wf, stride, stride,
+                                padh, padw)
+      dX_simple = max_pool2d_simple::backward(dout, Hout_simple, Wout_simple, X, C, Hin, Win,
+                                              Hf, Wf, stride, stride, padh, padw)
+      dX_builtin = max_pool2d_builtin::backward(dout, Hout_builtin, Wout_builtin, X, C, Hin, Win,
+                                                Hf, Wf, stride, stride, padh, padw)
+
+      # Equivalency check
+      dX = matrix(dX, rows=1, cols=N*C*Hin*Win)
+      dX_simple = matrix(dX_simple, rows=1, cols=N*C*Hin*Win)
+      dX_builtin = matrix(dX_builtin, rows=1, cols=N*C*Hin*Win)
+      for (i in 1:length(dX)) {
+        rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
+                                               as.scalar(dX_simple[1,i]), 1e-10, 1e-12)
+        rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
+                                               as.scalar(dX_builtin[1,i]), 1e-10, 1e-12)
+      }
+    }
+  }
+
+  # ---
+  print(" - Testing for correct behavior against known answer w/ pad=0.")
+  # generate data
+  # -- channel 1
+  #  1  2  3  4
+  #  5  6  7  8
+  #  9 10 11 12
+  # 13 14 15 16
+  # -- channel 2
+  #  1  5  9 13
+  #  2  6 10 14
+  #  3  7 11 15
+  #  4  8 12 16
+  C = 2  # num channels
+  Hin = 4  # input height
+  Win = 4  # input width
+  X = matrix(seq(1,16,1), rows=Hin, cols=Win)
+  X = matrix(rbind(X, t(X)), rows=1, cols=C*Hin*Win)  # C=2
+  X = rbind(X, X)  # n=2
+  pad = 0
+
+  # forward
+  [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+  [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+                                                                      stride, stride, pad, pad)
+  [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+                                                                          stride, stride, pad, pad)
+
+  # equivalency check
+  # -- channel 1
+  #   6  8
+  #  14 16
+  # -- channel 2
+  #  6  14
+  #  8  16
+  target = matrix("6 8 14 16 6 14 8 16", rows=1, cols=C*Hout*Wout)
+  target = rbind(target, target)  # n=2
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
+
+  print(" - Testing for correct behavior against known answer w/ pad=1.")
+  # generate data
+  # -- channel 1
+  #  0  0  0  0  0  0
+  #  0  1  2  3  4  0
+  #  0  5  6  7  8  0
+  #  0  9 10 11 12  0
+  #  0 13 14 15 16  0
+  #  0  0  0  0  0  0
+  # -- channel 2
+  #  0  0  0  0  0  0
+  #  0  1  5  9 13  0
+  #  0  2  6 10 14  0
+  #  0  3  7 11 15  0
+  #  0  4  8 12 16  0
+  #  0  0  0  0  0  0
+  pad = 1
+
+  # forward
+  [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+  [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+                                                                      stride, stride, pad, pad)
+  [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+                                                                          stride, stride, pad, pad)
+
+  # equivalency check
+  # -- channel 1
+  #  1  3  4
+  #  9 11 12
+  # 13 15 16
+  # -- channel 2
+  #  1  9 13
+  #  3 11 15
+  #  4 12 16
+  target = matrix("1 3 4 9 11 12 13 15 16 1 9 13 3 11 15 4 12 16", rows=1, cols=C*Hout*Wout)
+  target = rbind(target, target)  # n=2
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
+
+  print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=0.")
+  # generate data
+  # -- channel 1
+  #  -1  -2  -3  -4
+  #  -5  -6  -7  -8
+  #  -9 -10 -11 -12
+  # -13 -14 -15 -16
+  # -- channel 2
+  #  -1  -5  -9 -13
+  #  -2  -6 -10 -14
+  #  -3  -7 -11 -15
+  #  -4  -8 -12 -16
+  X = X * -1
+  pad = 0
+
+  # forward
+  [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+  [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+                                                                      stride, stride, pad, pad)
+  [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+                                                                          stride, stride, pad, pad)
+
+  # equivalency check
+  # -- channel 1
+  #  -1  -3
+  #  -9 -11
+  # -- channel 2
+  #  -1  -9
+  #  -3 -11
+  target = matrix("-1 -3 -9 -11 -1 -9 -3 -11", rows=1, cols=C*Hout*Wout)
+  target = rbind(target, target)  # n=2
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
+
+
+  print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=1.")
+  # generate data
+  # -- channel 1
+  #  0   0   0   0   0  0
+  #  0  -1  -2  -3  -4  0
+  #  0  -5  -6  -7  -8  0
+  #  0  -9 -10 -11 -12  0
+  #  0 -13 -14 -15 -16  0
+  #  0   0   0   0   0  0
+  # -- channel 2
+  #  0   0   0   0   0  0
+  #  0  -1  -5  -9 -13  0
+  #  0  -2  -6 -10 -14  0
+  #  0  -3  -7 -11 -15  0
+  #  0  -4  -8 -12 -16  0
+  #  0   0   0   0   0  0
+  pad = 1
+
+  # forward
+  [out, Hout, Wout] = max_pool2d::forward(X, C, Hin, Win, Hf, Wf, stride, stride, pad, pad)
+  [out_simple, Hout_simple, Wout_simple] = max_pool2d_simple::forward(X, C, Hin, Win, Hf, Wf,
+                                                                      stride, stride, pad, pad)
+  [out_builtin, Hout_builtin, Wout_builtin] = max_pool2d_builtin::forward(X, C, Hin, Win, Hf, Wf,
+                                                                          stride, stride, pad, pad)
+
+  # equivalency check
+  # -- channel 1
+  #  0  0  0
+  #  0 -6  0
+  #  0  0  0
+  # -- channel 2
+  #  0  0  0
+  #  0 -6  0
+  #  0  0  0
+  target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16",
+                  rows=1, cols=C*Hout*Wout)
+  target = rbind(target, target)  # n=2
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
+}
+
+batch_norm2d = function() {
+  /*
+   * Test for the 2D (spatial) batch normalization function.
+   */
+  print("Testing the 2D (spatial) batch normalization function.")
+
+  # Generate data
+  N = 2  # Number of examples
+  C = 3  # num channels
+  Hin = 4  # input height
+  Win = 5  # input width
+  mode = 'train'  # execution mode
+  mu = 0.9  # momentum of moving averages
+  eps = 1e-5  # smoothing term
+  X = matrix("70  29 23 55 72
+              42  98 68 48 39
+              34  73 44  6 40
+              74  18 18 53 53
+
+              63  85 72 61 72
+              32  36 23 29 63
+               9  43 43 49 43
+              31  43 89 94 50
+
+              62  12 32 41 87
+              25  48 99 52 61
+              12  83 60 55 34
+              30  42 68 88 51
+
+
+              67  59 62 67 84
+               8  76 24 19 57
+              10  89 63 72  2
+              59  56 16 15 70
+
+              32  69 55 39 93
+              84  36  4 30 40
+              70 100 36 76 59
+              69  15 40 24 34
+
+              51  67 11 13 32
+              66  85 55 85 38
+              32  35 17 83 34
+              55  58 52  0 99", rows=N, cols=C*Hin*Win)
+
+  # Create layer
+  [gamma, beta, ema_mean, ema_var] = batch_norm2d::init(C)
+
+  # Forward
+  [out, ema_mean_upd, ema_var_upd, cache_mean, cache_var, cache_norm] =
+      batch_norm2d::forward(X, gamma, beta, C, Hin, Win, mode, ema_mean, ema_var, mu, eps)
+
+  # Equivalency check
+  target = matrix("0.86215019 -0.76679718 -1.00517964  0.26619387  0.94161105
+                  -0.25030172  1.97460198  0.78268933 -0.01191914 -0.36949289
+                  -0.56814504  0.98134136 -0.17084086 -1.68059683 -0.32976246
+                   1.02107191 -1.20383179 -1.20383179  0.18673301  0.18673301
+
+                   0.50426388  1.41921711  0.87856293  0.42108631  0.87856293
+                  -0.78498828 -0.61863315 -1.15928721 -0.90975463  0.50426388
+                  -1.74153018 -0.32751167 -0.32751167 -0.07797909 -0.32751167
+                  -0.82657707 -0.32751167  1.58557224  1.79351616 -0.0363903
+
+                   0.4607178  -1.49978399 -0.71558321 -0.36269283  1.44096887
+                  -0.99005347 -0.08822262  1.91148913  0.06861746  0.42150795
+                  -1.49978399  1.28412855  0.38229787  0.18624771 -0.63716316
+                  -0.79400325 -0.32348287  0.69597805  1.48017895  0.0294075
+
+
+                   0.74295878  0.42511559  0.54430676  0.74295878  1.41837597
+                  -1.60113597  1.10053277 -0.96544927 -1.16410136  0.34565473
+                  -1.52167511  1.61702824  0.5840373   0.94161105 -1.83951855
+                   0.42511559  0.30592418 -1.28329265 -1.32302308  0.86215019
+
+                  -0.78498828  0.75379658  0.17155361 -0.4938668   1.75192738
+                   1.37762833 -0.61863315 -1.9494741  -0.86816585 -0.45227802
+                   0.79538536  2.04304862 -0.61863315  1.04491806  0.33790874
+                   0.75379658 -1.49199748 -0.45227802 -1.11769855 -0.70181072
+
+                   0.0294075   0.65676796 -1.53899395 -1.46057391 -0.71558321
+                   0.61755812  1.36254871  0.18624771  1.36254871 -0.48032296
+                  -0.71558321 -0.59795308 -1.30373383  1.28412855 -0.63716316
+                   0.18624771  0.30387771  0.06861746 -1.97030437  1.91148913",
+                  rows=1, cols=N*C*Hin*Win)
+  out = matrix(out, rows=1, cols=N*C*Hin*Win)
+  for (i in 1:length(out)) {
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
+                                           as.scalar(target[1,i]), 1e-3, 1e-4)
+  }
+}
+
+tanh = function() {
+  /*
+   * Test for the `tanh` forward function.
+   */
+  print("Testing the tanh forward function.")
+
+  # Generate data
+  N = 2  # num examples
+  C = 3  # num channels
+  X = rand(rows=N, cols=C, pdf="normal")
+
+  out = tanh::forward(X)
+  out_ref = (exp(X) - exp(-X)) / (exp(X) + exp(-X))
+
+  # Equivalency check
+  for (i in 1:nrow(out)) {
+    for (j in 1:ncol(out)) {
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]),
+                                             1e-10, 1e-12)
+    }
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/test/util.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/test/util.dml b/scripts/nn/test/util.dml
new file mode 100644
index 0000000..e32a885
--- /dev/null
+++ b/scripts/nn/test/util.dml
@@ -0,0 +1,155 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Test utility functions.
+ */
+
+all_equal = function(matrix[double] X1, matrix[double] X2)
+    return(boolean equivalent) {
+  /*
+   * Determine if two matrices are equivalent.
+   *
+   * Inputs:
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
+   *
+   * Outputs:
+   *  - equivalent: Whether or not the two matrices are equivalent.
+   */
+  equivalent = as.logical(prod(X1 == X2))
+}
+
+check_all_equal = function(matrix[double] X1, matrix[double] X2)
+    return(boolean equivalent) {
+  /*
+   * Check if two matrices are equivalent, and report any issues.
+   *
+   * Issues an "ERROR" statement if elements of the two matrices are
+   * not equal.
+   *
+   * Inputs:
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
+   *
+   * Outputs:
+   *  - equivalent: Whether or not the two matrices are equivalent.
+   */
+  # Determine if matrices are equivalent
+  equivalent = all_equal(X1, X2)
+
+  # Evaluate relative error
+  if (!equivalent) {
+    print("ERROR: The two matrices are not equivalent.")
+  }
+}
+
+compute_rel_error = function(double x1, double x2)
+    return (double rel_error) {
+  /*
+   * Relative error measure between two values.
+   *
+   * Uses smoothing to avoid divide-by-zero errors.
+   *
+   * Inputs:
+   *  - x1: First value.
+   *  - x2: Second value.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two values.
+   */
+  rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
+}
+
+check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
+    return (double rel_error) {
+  /*
+   * Check and report any issues with the relative error measure between
+   * two values.
+   *
+   * Issues an "ERROR" statement for relative errors > thresh_error,
+   * indicating that the implementation is likely incorrect.
+   *
+   * Issues a "WARNING" statement for relative errors < thresh_error
+   * but > thresh_warn, indicating that the implementation may be
+   * incorrect.
+   *
+   * Inputs:
+   *  - x1: First value.
+   *  - x2: Second value.
+   *  - thresh_error: Error threshold.
+   *  - thresh_warn: Warning threshold.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two values.
+   */
+  # Compute relative error
+  rel_error = compute_rel_error(x1, x2)
+
+  # Evaluate relative error
+  if (rel_error > thresh_error) {
+    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 +
+          " vs " + x2 + ".")
+  }
+  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+          " with " + x1 + " vs " + x2 + ".")
+  }
+}
+
+check_rel_grad_error = function(double dw_a, double dw_n, double lossph, double lossmh)
+    return (double rel_error) {
+  /*
+   * Check and report any issues with the relative error measure between
+   * the analytical and numerical partial derivatives.
+   *
+   *  - Issues an "ERROR" statement for relative errors > 1e-2,
+   *  indicating that the gradient is likely incorrect.
+   *  - Issues a "WARNING" statement for relative errors < 1e-2
+   *  but > 1e-4, indicating that the may be incorrect.
+   *
+   * Inputs:
+   *  - dw_a: Analytical partial derivative wrt w.
+   *  - dw_n: Numerical partial derivative wrt w.
+   *  - lossph: Loss evaluated with w set to w+h.
+   *  - lossmh: Loss evaluated with w set to w-h.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two derivatives.
+   */
+  # Compute relative error
+  rel_error = compute_rel_error(dw_a, dw_n)
+
+  # Evaluate relative error
+  thresh_error = 1e-2
+  thresh_warn = 1e-4
+  if (rel_error > thresh_error) {
+    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a +
+          " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+          " and lossmh " + lossmh)
+  }
+  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+          " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+          " and lossmh " + lossmh)
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/util.dml b/scripts/nn/util.dml
new file mode 100644
index 0000000..3a73f08
--- /dev/null
+++ b/scripts/nn/util.dml
@@ -0,0 +1,202 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Utility functions.
+ */
+
+channel_sums = function(matrix[double] X, int C, int Hin, int Win)
+    return (matrix[double] out) {
+  /*
+   * Computes a channel-wise summation over a 4D input.
+   *
+   * Inputs:
+   *  - X: Inputs, of shape (N, C*Hin*Win).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *
+   * Outputs:
+   *  - out: Outputs, of shape (C, 1).
+   */
+  # Here we sum each column, reshape to (C, Hin*Win), and sum each row to result in the summation
+  # for each channel.
+  out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win))  # shape (C, 1)
+}
+
+im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew)
+    return (matrix[double] img_cols) {
+  /*
+   * Rearrange local image regions (patches) into columns.
+   *
+   * Assumes image has already been padded as necessary.
+   *
+   * Inputs:
+   *  - img: Input image, of shape (C, Hin*Win), where C is the number
+   *      of input channels (depth).
+   *  - Hin: Input height, including padding.
+   *  - Win: Input width, including padding.
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *
+   * Outputs:
+   *  - img_cols: Local spatial regions (patches) of the image stretched
+   *      out into columns, of shape (C*Hf*Wf, Hout*Wout).
+   */
+  C = nrow(img)
+  Hout = as.integer(floor((Hin-Hf)/strideh + 1))
+  Wout = as.integer(floor((Win-Wf)/stridew + 1))
+
+  # Note: We start with `img_cols` transposed to allow for row-major
+  # left-indexing inside the loop, which is more performant.
+  img_cols = matrix(0, rows=Hout*Wout, cols=C*Hf*Wf)  # zeros
+  parfor (hout in 1:Hout, check=0) {  # all output rows
+    hin = (hout-1)*strideh + 1
+    parfor (wout in 1:Wout, check=0) {  # all output columns
+      win = (wout-1)*stridew + 1
+      # Extract a local patch of the input image corresponding spatially to the filter sizes.
+      img_patch = matrix(0, rows=C, cols=Hf*Wf)  # zeros
+      parfor (c in 1:C) {  # all channels
+        img_slice = matrix(img[c,], rows=Hin, cols=Win)  # reshape
+        img_patch[c,] = matrix(img_slice[hin:hin+Hf-1, win:win+Wf-1], rows=1, cols=Hf*Wf)
+      }
+      img_cols[(hout-1)*Wout + wout,] = t(matrix(img_patch, rows=C*Hf*Wf, cols=1))  # reshape
+    }
+  }
+  img_cols = t(img_cols)
+}
+
+col2im = function(matrix[double] img_cols, int C, int Hin, int Win, int Hf, int Wf,
+                  int strideh, int stridew, string reduction)
+    return (matrix[double] img) {
+  /*
+   * Create an image from columns of local image regions (patches).
+   *
+   * The reduction strategy determines how to deal with overlapping
+   * patches.  If it is set to "add", any overlapping patches will be
+   * added together when creating the image.  This is useful when
+   * computing gradients on the original image given gradients on the
+   * patches.  Otherwise, if "none" is provided, any overlapping
+   * patches will just override previous ones when creating the image.
+   * This is useful when recreating an image from the output of
+   * `im2col`.
+   *
+   * Assumes original image was already padded as necessary.
+   *
+   * Inputs:
+   *  - img_cols: Local spatial regions (patches) of the image stretched
+   *      out into columns, of shape (C*Hf*Wf, Hout*Wout).
+   *  - C: Number of input channels (dimensionality of input depth).
+   *  - Hin: Input height, including padding.
+   *  - Win: Input width, including padding.
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  - strideh: Stride over height.
+   *  - stridew: Stride over width.
+   *  - reduction: The reduction strategy to use for overlapping
+   *      patches.  Valid options are "add" and "none".
+   *
+   * Outputs:
+   *  - img: Input image, of shape (C, Hin*Win).
+   */
+  Hout = as.integer(floor((Hin-Hf)/strideh + 1))
+  Wout = as.integer(floor((Win-Wf)/stridew + 1))
+
+  img = matrix(0, rows=C, cols=Hin*Win)  # zeros
+  for (hout in 1:Hout) {  # all output rows
+    hin = (hout-1)*strideh + 1
+    for (wout in 1:Wout) {  # all output columns
+      win = (wout-1)*stridew + 1
+      # Extract a local patch of the input image corresponding spatially to the filter sizes.
+      img_patch = matrix(img_cols[,(hout-1)*Wout + wout], rows=C, cols=Hf*Wf)  # zeros
+      parfor (c in 1:C) {  # all channels
+        img_patch_slice = matrix(img_patch[c,], rows=Hf, cols=Wf)  # reshape
+        if (reduction == "add") {
+          img_slice = matrix(0, rows=Hin, cols=Win)
+          img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
+          img[c,] = img[c,] + matrix(img_slice, rows=1, cols=Hin*Win)
+        } else {
+          img_slice = matrix(img[c,], rows=Hin, cols=Win)
+          img_slice[hin:hin+Hf-1, win:win+Wf-1] = img_patch_slice
+          img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
+        }
+      }
+    }
+  }
+}
+
+pad_image = function(matrix[double] img, int Hin, int Win, int padh, int padw, double pad_value)
+    return (matrix[double] img_padded) {
+  /*
+   * Pads an image along the height and width dimensions with zeros.
+   *
+   * Inputs:
+   *  - img: Input image, of shape (C, Hin*Win), where C is the number
+   *      of input channels (depth).
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - padh: Padding for top and bottom sides.
+   *  - padw: Padding for left and right sides.
+   *  - pad_value: Value to use for the padding.
+   *      A typical value is 0.
+   *
+   * Outputs:
+   *  - img_padded: The input image padded along the height and width
+   *      dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
+   */
+  C = nrow(img)
+  img_padded = matrix(0, rows=C, cols=(Hin+2*padh)*(Win+2*padw))  # zeros
+  parfor (c in 1:C) {
+    img_slice = matrix(img[c,], rows=Hin, cols=Win)  # depth slice C reshaped
+    img_padded_slice = matrix(pad_value, rows=Hin+2*padh, cols=Win+2*padw)
+    img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win] = img_slice
+    img_padded[c,] = matrix(img_padded_slice, rows=1, cols=(Hin+2*padh)*(Win+2*padw))  # reshape
+  }
+}
+
+unpad_image = function(matrix[double] img_padded, int Hin, int Win, int padh, int padw)
+    return (matrix[double] img) {
+  /*
+   * Unpads an image along the height and width dimensions.
+   *
+   * Inputs:
+   *  - img_padded: The input image padded along the height and width
+   *      dimensions, of shape (C, (Hin+2*padh)*(Win+2*padw)).
+   *  - Hin: Input height of unpadded image.
+   *  - Win: Input width of unpadded image.
+   *  - padh: Padding for top and bottom sides.
+   *  - padw: Padding for left and right sides.
+   *
+   * Outputs:
+   *  - img: Input image, of shape (C, Hin*Win), where C is the number
+   *      of input channels (depth).
+   */
+  C = nrow(img_padded)
+  img = matrix(0, rows=C, cols=Hin*Win)
+  parfor (c in 1:C) {
+    img_padded_slice = matrix(img_padded[c,], rows=(Hin+2*padh), cols=(Win+2*padw))
+    img_slice = img_padded_slice[padh+1:padh+Hin, padw+1:padw+Win]
+    img[c,] = matrix(img_slice, rows=1, cols=Hin*Win)
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/README.md
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/README.md b/scripts/staging/SystemML-NN/README.md
deleted file mode 100644
index b80f2c6..0000000
--- a/scripts/staging/SystemML-NN/README.md
+++ /dev/null
@@ -1,183 +0,0 @@
-<!--
-{% comment %}
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements.  See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to you under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-{% endcomment %}
--->
-
-# SystemML-NN
-
-### A deep learning library for [Apache SystemML](https://github.com/apache/incubator-systemml).
-
-## Examples:
-#### Please see the [`examples`](nn/examples) folder for more detailed examples, or view the following two quick examples.
-### Neural net for regression with vanilla SGD:
-```python
-# Imports
-source("nn/layers/affine.dml") as affine
-source("nn/layers/l2_loss.dml") as l2_loss
-source("nn/layers/relu.dml") as relu
-source("nn/optim/sgd.dml") as sgd
-
-# Generate input data
-N = 1024 # num examples
-D = 100 # num features
-t = 1 # num targets
-X = rand(rows=N, cols=D, pdf="normal")
-y = rand(rows=N, cols=t)
-
-# Create 2-layer network:
-## affine1 -> relu1 -> affine2
-M = 64 # number of neurons
-[W1, b1] = affine::init(D, M)
-[W2, b2] = affine::init(M, t)
-
-# Initialize optimizer
-lr = 0.05  # learning rate
-mu = 0.9  # momentum
-decay = 0.99  # learning rate decay constant
-
-# Optimize
-print("Starting optimization")
-batch_size = 32
-epochs = 5
-iters = 1024 / batch_size
-for (e in 1:epochs) {
-  for(i in 1:iters) {
-    # Get next batch
-    X_batch = X[i:i+batch_size-1,]
-    y_batch = y[i:i+batch_size-1,]
-
-    # Compute forward pass
-    out1 = affine::forward(X_batch, W1, b1)
-    outr1 = relu::forward(out1)
-    out2 = affine::forward(outr1, W2, b2)
-
-    # Compute loss
-    loss = l2_loss::forward(out2, y_batch)
-    print("L2 loss: " + loss)
-
-    # Compute backward pass
-    dout2 = l2_loss::backward(out2, y_batch)
-    [doutr1, dW2, db2] = affine::backward(dout2, outr1, W2, b2)
-    dout1 = relu::backward(doutr1, out1)
-    [dX_batch, dW1, db1] = affine::backward(dout1, X_batch, W1, b1)
-
-    # Optimize with vanilla SGD
-    W1 = sgd::update(W1, dW1, lr)
-    b1 = sgd::update(b1, db1, lr)
-    W2 = sgd::update(W2, dW2, lr)
-    b2 = sgd::update(b2, db2, lr)
-  }
-  # Decay learning rate
-  lr = lr * decay
-}
-```
-
-### Neural net for multi-class classification with dropout and SGD w/ Nesterov momentum:
-```python
-# Imports
-source("nn/layers/affine.dml") as affine
-source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
-source("nn/layers/dropout.dml") as dropout
-source("nn/layers/relu.dml") as relu
-source("nn/layers/softmax.dml") as softmax
-source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
-
-# Generate input data
-N = 1024 # num examples
-D = 100 # num features
-t = 5 # num targets
-X = rand(rows=N, cols=D, pdf="normal")
-classes = round(rand(rows=N, cols=1, min=1, max=t, pdf="uniform"))
-y = matrix(0, rows=N, cols=t)
-parfor (i in 1:N) {
-  y[i, as.scalar(classes[i,1])] = 1  # one-hot encoding
-}
-
-# Create network:
-# affine1 -> relu1 -> dropout1 -> affine2 -> relu2 -> dropout2 -> affine3 -> softmax
-H1 = 64 # number of neurons in 1st hidden layer
-H2 = 64 # number of neurons in 2nd hidden layer
-p = 0.5  # dropout probability
-[W1, b1] = affine::init(D, H1)
-[W2, b2] = affine::init(H1, H2)
-[W3, b3] = affine::init(H2, t)
-
-# Initialize SGD w/ Nesterov momentum optimizer
-lr = 0.05  # learning rate
-mu = 0.5  # momentum
-decay = 0.99  # learning rate decay constant
-vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1)
-vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2)
-vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3)
-
-# Optimize
-print("Starting optimization")
-batch_size = 64
-epochs = 10
-iters = 1024 / batch_size
-for (e in 1:epochs) {
-  for(i in 1:iters) {
-    # Get next batch
-    X_batch = X[i:i+batch_size-1,]
-    y_batch = y[i:i+batch_size-1,]
-
-    # Compute forward pass
-    ## layer 1:
-    out1 = affine::forward(X_batch, W1, b1)
-    outr1 = relu::forward(out1)
-    [outd1, maskd1] = dropout::forward(outr1, p, -1)
-    ## layer 2:
-    out2 = affine::forward(outd1, W2, b2)
-    outr2 = relu::forward(out2)
-    [outd2, maskd2] = dropout::forward(outr2, p, -1)
-    ## layer 3:
-    out3 = affine::forward(outd2, W3, b3)
-    probs = softmax::forward(out3)
-
-    # Compute loss
-    loss = cross_entropy_loss::forward(probs, y_batch)
-    print("Cross entropy loss: " + loss)
-
-    # Compute backward pass
-    ## loss:
-    dprobs = cross_entropy_loss::backward(probs, y_batch)
-    ## layer 3:
-    dout3 = softmax::backward(dprobs, out3)
-    [doutd2, dW3, db3] = affine::backward(dout3, outd2, W3, b3)
-    ## layer 2:
-    doutr2 = dropout::backward(doutd2, outr2, p, maskd2)
-    dout2 = relu::backward(doutr2, out2)
-    [doutd1, dW2, db2] = affine::backward(dout2, outd1, W2, b2)
-    ## layer 1:
-    doutr1 = dropout::backward(doutd1, outr1, p, maskd1)
-    dout1 = relu::backward(doutr1, out1)
-    [dX_batch, dW1, db1] = affine::backward(dout1, X_batch, W1, b1)
-
-    # Optimize with SGD w/ Nesterov momentum
-    [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
-    [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1)
-    [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2)
-    [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2)
-    [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3)
-    [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3)
-  }
-  # Anneal momentum towards 0.999
-  mu = mu + (0.999 - mu)/(1+epochs-e)
-  # Decay learning rate
-  lr = lr * decay
-}
-```

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb b/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb
deleted file mode 100644
index 0423269..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST LeNet.ipynb	
+++ /dev/null
@@ -1,189 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Quick Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create a SystemML MLContext object\n",
-    "from systemml import MLContext, dml\n",
-    "ml = MLContext(sc)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Download Data - MNIST"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9].  Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%sh\n",
-    "mkdir -p data/mnist/\n",
-    "cd data/mnist/\n",
-    "curl -O https://pjreddie.com/media/files/mnist_train.csv\n",
-    "curl -O https://pjreddie.com/media/files/mnist_test.csv"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## SystemML \"LeNet\" Neural Network"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "script_string = \"\"\"\n",
-    "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
-    "\n",
-    "# Read training data\n",
-    "data = read($data, format=\"csv\")\n",
-    "n = nrow(data)\n",
-    "\n",
-    "# Extract images and labels\n",
-    "images = data[,2:ncol(data)]\n",
-    "labels = data[,1]\n",
-    "\n",
-    "# Scale images to [-1,1], and one-hot encode the labels\n",
-    "images = (images / 255.0) * 2 - 1\n",
-    "labels = table(seq(1, n), labels+1, n, 10)\n",
-    "\n",
-    "# Split into training (55,000 examples) and validation (5,000 examples)\n",
-    "X = images[5001:nrow(images),]\n",
-    "X_val = images[1:5000,]\n",
-    "y = labels[5001:nrow(images),]\n",
-    "y_val = labels[1:5000,]\n",
-    "\n",
-    "# Train\n",
-    "epochs = 10\n",
-    "[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs)\n",
-    "\"\"\"\n",
-    "script = (dml(script_string).input(\"$data\", \"data/mnist/mnist_train.csv\")\n",
-    "                            .input(C=1, Hin=28, Win=28)\n",
-    "                            .output(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))\n",
-    "W1, b1, W2, b2, W3, b3, W4, b4 = (ml.execute(script)\n",
-    "                                    .get(\"W1\", \"b1\", \"W2\", \"b2\", \"W3\", \"b3\", \"W4\", \"b4\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Compute Test Accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "script_string = \"\"\"\n",
-    "source(\"nn/examples/mnist_lenet.dml\") as mnist_lenet\n",
-    "\n",
-    "# Read test data\n",
-    "data = read($data, format=\"csv\")\n",
-    "n = nrow(data)\n",
-    "\n",
-    "# Extract images and labels\n",
-    "X_test = data[,2:ncol(data)]\n",
-    "y_test = data[,1]\n",
-    "\n",
-    "# Scale images to [-1,1], and one-hot encode the labels\n",
-    "X_test = (X_test / 255.0) * 2 - 1\n",
-    "y_test = table(seq(1, n), y_test+1, n, 10)\n",
-    "\n",
-    "# Eval on test set\n",
-    "probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)\n",
-    "[loss, accuracy] = mnist_lenet::eval(probs, y_test)\n",
-    "\n",
-    "print(\"Test Accuracy: \" + accuracy)\n",
-    "\"\"\"\n",
-    "script = dml(script_string).input(**{\"$data\": \"data/mnist/mnist_train.csv\",\n",
-    "                                     \"C\": 1, \"Hin\": 28, \"Win\": 28,\n",
-    "                                     \"W1\": W1, \"b1\": b1,\n",
-    "                                     \"W2\": W2, \"b2\": b2,\n",
-    "                                     \"W3\": W3, \"b3\": b3,\n",
-    "                                     \"W4\": W4, \"b4\": b4})\n",
-    "ml.execute(script)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Extract Model Into Spark DataFrames For Future Use"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "W1_df = W1.toDF()\n",
-    "b1_df = b1.toDF()\n",
-    "W2_df = W2.toDF()\n",
-    "b2_df = b2.toDF()\n",
-    "W3_df = W3.toDF()\n",
-    "b3_df = b3.toDF()\n",
-    "W4_df = W4.toDF()\n",
-    "b4_df = b4.toDF()\n",
-    "W1_df, b1_df, W2_df, b2_df, W3_df, b3_df, W4_df, b4_df"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 + Spark 2.x + SystemML",
-   "language": "python",
-   "name": "pyspark3_2.x"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb b/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb
deleted file mode 100644
index 5e7182a..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/Example - MNIST Softmax Classifier.ipynb	
+++ /dev/null
@@ -1,179 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Quick Setup"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "# Create a SystemML MLContext object\n",
-    "from systemml import MLContext, dml\n",
-    "ml = MLContext(sc)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Download Data - MNIST"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The MNIST dataset contains labeled images of handwritten digits, where each example is a 28x28 pixel image of grayscale values in the range [0,255] stretched out as 784 pixels, and each label is one of 10 possible digits in [0,9].  Here, we download 60,000 training examples, and 10,000 test examples, where the format is \"label, pixel_1, pixel_2, ..., pixel_n\"."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "%%sh\n",
-    "mkdir -p data/mnist/\n",
-    "cd data/mnist/\n",
-    "curl -O https://pjreddie.com/media/files/mnist_train.csv\n",
-    "curl -O https://pjreddie.com/media/files/mnist_test.csv"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## SystemML Softmax Model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 1. Train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "training = \"\"\"\n",
-    "source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n",
-    "\n",
-    "# Read training data\n",
-    "data = read($data, format=\"csv\")\n",
-    "n = nrow(data)\n",
-    "\n",
-    "# Extract images and labels\n",
-    "images = data[,2:ncol(data)]\n",
-    "labels = data[,1]\n",
-    "\n",
-    "# Scale images to [0,1], and one-hot encode the labels\n",
-    "images = images / 255.0\n",
-    "labels = table(seq(1, n), labels+1, n, 10)\n",
-    "\n",
-    "# Split into training (55,000 examples) and validation (5,000 examples)\n",
-    "X = images[5001:nrow(images),]\n",
-    "X_val = images[1:5000,]\n",
-    "y = labels[5001:nrow(images),]\n",
-    "y_val = labels[1:5000,]\n",
-    "\n",
-    "# Train\n",
-    "epochs = 1\n",
-    "[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)\n",
-    "\"\"\"\n",
-    "script = dml(training).input(\"$data\", \"data/mnist/mnist_train.csv\").output(\"W\", \"b\")\n",
-    "W, b = ml.execute(script).get(\"W\", \"b\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 2. Compute Test Accuracy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "testing = \"\"\"\n",
-    "source(\"nn/examples/mnist_softmax.dml\") as mnist_softmax\n",
-    "\n",
-    "# Read test data\n",
-    "data = read($data, format=\"csv\")\n",
-    "n = nrow(data)\n",
-    "\n",
-    "# Extract images and labels\n",
-    "X_test = data[,2:ncol(data)]\n",
-    "y_test = data[,1]\n",
-    "\n",
-    "# Scale images to [0,1], and one-hot encode the labels\n",
-    "X_test = X_test / 255.0\n",
-    "y_test = table(seq(1, n), y_test+1, n, 10)\n",
-    "\n",
-    "# Eval on test set\n",
-    "probs = mnist_softmax::predict(X_test, W, b)\n",
-    "[loss, accuracy] = mnist_softmax::eval(probs, y_test)\n",
-    "\n",
-    "print(\"Test Accuracy: \" + accuracy)\n",
-    "\"\"\"\n",
-    "script = dml(testing).input(\"$data\", \"data/mnist/mnist_test.csv\", W=W, b=b)\n",
-    "ml.execute(script)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### 3. Extract Model Into Spark DataFrames For Future Use"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "W_df = W.toDF()\n",
-    "b_df = b.toDF()\n",
-    "W_df, b_df"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.1"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/README.md
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/README.md b/scripts/staging/SystemML-NN/nn/examples/README.md
deleted file mode 100644
index d5e9d04..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-<!--
-{% comment %}
-Licensed to the Apache Software Foundation (ASF) under one or more
-contributor license agreements.  See the NOTICE file distributed with
-this work for additional information regarding copyright ownership.
-The ASF licenses this file to you under the Apache License, Version 2.0
-(the "License"); you may not use this file except in compliance with
-the License.  You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-{% endcomment %}
--->
-
-# SystemML-NN Examples
-
-#### This folder contains scripts and PySpark Jupyter notebooks serving as examples of using the *SystemML-NN* (`nn`) deep learning library.
-
----
-
-# Examples
-### MNIST Softmax Classifier
-
-* This example trains a softmax classifier, which is essentially a multi-class logistic regression model, on the MNIST data.  The model will be trained on the *training* images, validated on the *validation* images, and tested for final performance metrics on the *test* images.
-* Notebook: `Example - MNIST Softmax Classifier.ipynb`.
-* DML Functions: `mnist_softmax.dml`
-* Training script: `mnist_softmax-train.dml`
-* Prediction script: `mnist_softmax-predict.dml`
-
-### MNIST "LeNet" Neural Net
-
-* This example trains a neural network on the MNIST data using a ["LeNet" architecture](http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf). The model will be trained on the *training* images, validated on the *validation* images, and tested for final performance metrics on the *test* images.
-* Notebook: `Example - MNIST LeNet.ipynb`.
-* DML Functions: `mnist_lenet.dml`
-* Training script: `mnist_lenet-train.dml`
-* Prediction script: `mnist_lenet-predict.dml`
-
----
-
-# Setup
-## Code
-* To run the examples, please first download and unzip the project via GitHub using the "Clone or download" button on the [homepage of the project](https://github.com/dusenberrymw/systemml-nn), *or* via the following commands:
-
-  ```
-  git clone https://github.com/dusenberrymw/systemml-nn.git
-  ```
-
-* Then, move into the `systemml-nn` folder via:
-  ```
-  cd systemml-nn
-  ```
-
-## Data
-* These examples use the classic [MNIST](http://yann.lecun.com/exdb/mnist/) dataset, which contains labeled 28x28 pixel images of handwritten digits in the range of 0-9.  There are 60,000 training images, and 10,000 testing images.  Of the 60,000 training images, 5,000 will be used as validation images.
-* **Download**:
-  * **Notebooks**: The data will be automatically downloaded as a step in either of the example notebooks.
-  * **Training scripts**: Please run `get_mnist_data.sh` to download the data separately.
-
-## Execution
-* These examples contain scripts written in SystemML's R-like language (`*.dml`), as well as PySpark Jupyter notebooks (`*.ipynb`).  The scripts contain the math for the algorithms, enclosed in functions, and the notebooks serve as full, end-to-end examples of reading in data, training models using the functions within the scripts, and evaluating final performance.
-* **Notebooks**: To run the notebook examples, please install the SystemML Python package with `pip install systemml`, and then startup Jupyter in the following manner from this directory (or for more information, please see [this great blog post](http://spark.tc/0-to-life-changing-application-with-apache-systemml/)):
-
-  ```
-  PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS="notebook" pyspark --master local[*] --driver-memory 3G --driver-class-path SystemML.jar --jars SystemML.jar
-  ```
-
-  Note that all printed output, such as training statistics, from the SystemML scripts will be sent to the terminal in which Jupyter was started (for now...).
-
-* **Scripts**: To run the scripts from the command line using `spark-submit`, please see the comments located at the top of the `-train` and `-predict` scripts.

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh b/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh
deleted file mode 100755
index deb0c40..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/get_mnist_data.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-DIR="$(cd "$(dirname "$0")" && pwd)"
-mkdir -p $DIR/data/mnist/
-cd $DIR/data/mnist/
-curl -O https://pjreddie.com/media/files/mnist_train.csv
-curl -O https://pjreddie.com/media/files/mnist_test.csv
-

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml
deleted file mode 100644
index 85a5307..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-predict.dml
+++ /dev/null
@@ -1,91 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# MNIST LeNet - Predict
-#
-# This script computes the class probability predictions of a
-# trained convolutional net using the "LeNet" architecture on
-# images of handwritten digits.
-#
-# Inputs:
-#  - X: File containing training images.
-#     The format is "pixel_1, pixel_2, ..., pixel_n".
-#  - C: Number of color chanels in the images.
-#  - Hin: Input image height.
-#  - Win: Input image width.
-#  - model_dir: Directory containing the trained weights and biases
-#     of the model.
-#  - out_dir: Directory to store class probability predictions for
-#     each image.
-#  - fmt: [DEFAULT: "csv"] File format of `X` and output predictions.
-#     Options include: "csv", "mm", "text", and "binary".
-#
-# Outputs:
-#  - probs: File containing class probability predictions for each
-#     image.
-#
-# Data:
-# The X file should contain images of handwritten digits,
-# where each example is a 28x28 pixel image of grayscale values in
-# the range [0,255] stretched out as 784 pixels.
-#
-# Sample Invocation (running from outside the `nn` folder):
-# 1. Download images.
-#
-#   For example, save images to `nn/examples/data/mnist/images.csv`.
-#
-# 2. Execute using Spark
-#   ```
-#   spark-submit --master local[*] --driver-memory 5G
-#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
-#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet-predict.dml
-#   -nvargs X=nn/examples/data/mnist/images.csv C=1 Hin=28 Win=28
-#   model_dir=nn/examples/model/mnist_lenet out_dir=nn/examples/data/mnist
-#   ```
-#
-source("nn/examples/mnist_lenet.dml") as mnist_lenet
-
-# Read training data
-fmt = ifdef($fmt, "csv")
-X = read($X, format=fmt)
-C = $C
-Hin = $Hin
-Win = $Win
-
-# Scale images to [-1,1]
-X = (X / 255.0) * 2 - 1
-
-# Read model coefficients
-W1 = read($model_dir+"/W1")
-b1 = read($model_dir+"/b1")
-W2 = read($model_dir+"/W2")
-b2 = read($model_dir+"/b2")
-W3 = read($model_dir+"/W3")
-b3 = read($model_dir+"/b3")
-W4 = read($model_dir+"/W4")
-b4 = read($model_dir+"/b4")
-
-# Predict classes
-probs = mnist_lenet::predict(X, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)
-
-# Output results
-write(probs, $out_dir+"/probs."+fmt, format=fmt)
-

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml
deleted file mode 100644
index 0fc733e..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet-train.dml
+++ /dev/null
@@ -1,123 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# MNIST LeNet - Train
-#
-# This script trains a convolutional net using the "LeNet" architecture
-# on images of handwritten digits.
-#
-# Inputs:
-#  - train: File containing labeled MNIST training images.
-#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
-#  - test: File containing labeled MNIST test images.
-#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
-#  - C: Number of color chanels in the images.
-#  - Hin: Input image height.
-#  - Win: Input image width.
-#  - epochs: [DEFAULT: 10] Total number of full training loops over
-#     the full data set.
-#  - out_dir: [DEFAULT: "."] Directory to store weights and bias
-#     matrices of trained model, as well as final test accuracy.
-#  - fmt: [DEFAULT: "csv"] File format of `train` and `test` data.
-#     Options include: "csv", "mm", "text", and "binary".
-#
-# Outputs:
-#  - W1, W2, W3, W4: Files containing the trained weights of the model.
-#  - b1, b2, b3, b4: Files containing the trained biases of the model.
-#  - accuracy: File containing the final accuracy on the test data.
-#
-# Data:
-# The MNIST dataset contains labeled images of handwritten digits,
-# where each example is a 28x28 pixel image of grayscale values in
-# the range [0,255] stretched out as 784 pixels, and each label is
-# one of 10 possible digits in [0,9].
-#
-# Sample Invocation (running from outside the `nn` folder):
-# 1. Download data (60,000 training examples, and 10,000 test examples)
-#   ```
-#   nn/examples/get_mnist_data.sh
-#   ```
-#
-# 2. Execute using Spark
-#   ```
-#   spark-submit --master local[*] --driver-memory 10G
-#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
-#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_lenet-train.dml
-#   -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv
-#   C=1 Hin=28 Win=28 epochs=10 out_dir=nn/examples/model/mnist_lenet
-#   ```
-#
-source("nn/examples/mnist_lenet.dml") as mnist_lenet
-
-# Read training data & settings
-fmt = ifdef($fmt, "csv")
-train = read($train, format=fmt)
-test = read($test, format=fmt)
-C = $C
-Hin = $Hin
-Win = $Win
-epochs = ifdef($epochs, 10)
-out_dir = ifdef($out_dir, ".")
-
-# Extract images and labels
-images = train[,2:ncol(train)]
-labels = train[,1]
-X_test = test[,2:ncol(test)]
-y_test = test[,1]
-
-# Scale images to [-1,1], and one-hot encode the labels
-n = nrow(train)
-n_test = nrow(test)
-images = (images / 255.0) * 2 - 1
-labels = table(seq(1, n), labels+1, n, 10)
-X_test = (X_test / 255.0) * 2 - 1
-y_test = table(seq(1, n_test), y_test+1, n_test, 10)
-
-# Split into training (55,000 examples) and validation (5,000 examples)
-X = images[5001:nrow(images),]
-X_val = images[1:5000,]
-y = labels[5001:nrow(images),]
-y_val = labels[1:5000,]
-
-# Train
-[W1, b1, W2, b2, W3, b3, W4, b4] = mnist_lenet::train(X, y, X_val, y_val, C, Hin, Win, epochs)
-
-# Write model out
-write(W1, out_dir+"/W1")
-write(b1, out_dir+"/b1")
-write(W2, out_dir+"/W2")
-write(b2, out_dir+"/b2")
-write(W3, out_dir+"/W3")
-write(b3, out_dir+"/b3")
-write(W4, out_dir+"/W4")
-write(b4, out_dir+"/b4")
-
-# Eval on test set
-probs = mnist_lenet::predict(X_test, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)
-[loss, accuracy] = mnist_lenet::eval(probs, y_test)
-
-# Output results
-print("Test Accuracy: " + accuracy)
-write(accuracy, out_dir+"/accuracy")
-
-print("")
-print("")
-

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
deleted file mode 100644
index e5755c4..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/mnist_lenet.dml
+++ /dev/null
@@ -1,331 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-/*
- * MNIST LeNet Example
- */
-# Imports
-source("nn/layers/affine.dml") as affine
-source("nn/layers/conv2d_builtin.dml") as conv2d
-source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
-source("nn/layers/dropout.dml") as dropout
-source("nn/layers/l2_reg.dml") as l2_reg
-source("nn/layers/max_pool2d_builtin.dml") as max_pool2d
-source("nn/layers/relu.dml") as relu
-source("nn/layers/softmax.dml") as softmax
-source("nn/optim/sgd_nesterov.dml") as sgd_nesterov
-
-train = function(matrix[double] X, matrix[double] y,
-                 matrix[double] X_val, matrix[double] y_val,
-                 int C, int Hin, int Win, int epochs)
-    return (matrix[double] W1, matrix[double] b1,
-            matrix[double] W2, matrix[double] b2,
-            matrix[double] W3, matrix[double] b3,
-            matrix[double] W4, matrix[double] b4) {
-  /*
-   * Trains a convolutional net using the "LeNet" architecture.
-   *
-   * The input matrix, X, has N examples, each represented as a 3D
-   * volume unrolled into a single vector.  The targets, y, have K
-   * classes, and are one-hot encoded.
-   *
-   * Inputs:
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
-   *  - y: Target matrix, of shape (N, K).
-   *  - X_val: Input validation data matrix, of shape (N, C*Hin*Win).
-   *  - y_val: Target validation matrix, of shape (N, K).
-   *  - C: Number of input channels (dimensionality of input depth).
-   *  - Hin: Input height.
-   *  - Win: Input width.
-   *  - epochs: Total number of full training loops over the full data set.
-   *
-   * Outputs:
-   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
-   *  - b1: 1st layer biases vector, of shape (F1, 1).
-   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
-   *  - b2: 2nd layer biases vector, of shape (F2, 1).
-   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).
-   *  - b3: 3rd layer biases vector, of shape (1, N3).
-   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
-   *  - b4: 4th layer biases vector, of shape (1, K).
-   */
-  N = nrow(X)
-  K = ncol(y)
-
-  # Create network:
-  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax
-  Hf = 5  # filter height
-  Wf = 5  # filter width
-  stride = 1
-  pad = 2  # For same dimensions, (Hf - stride) / 2
-
-  F1 = 32  # num conv filters in conv1
-  F2 = 64  # num conv filters in conv2
-  N3 = 512  # num nodes in affine3
-  # Note: affine4 has K nodes, which is equal to the number of target dimensions (num classes)
-
-  [W1, b1] = conv2d::init(F1, C, Hf, Wf)  # inputs: (N, C*Hin*Win)
-  [W2, b2] = conv2d::init(F2, F1, Hf, Wf)  # inputs: (N, F1*(Hin/2)*(Win/2))
-  [W3, b3] = affine::init(F2*(Hin/2/2)*(Win/2/2), N3)  # inputs: (N, F2*(Hin/2/2)*(Win/2/2))
-  [W4, b4] = affine::init(N3, K)  # inputs: (N, N3)
-  W4 = W4 / sqrt(2)  # different initialization, since being fed into softmax, instead of relu
-
-  # Initialize SGD w/ Nesterov momentum optimizer
-  lr = 0.01  # learning rate
-  mu = 0.9  #0.5  # momentum
-  decay = 0.95  # learning rate decay constant
-  vW1 = sgd_nesterov::init(W1); vb1 = sgd_nesterov::init(b1)
-  vW2 = sgd_nesterov::init(W2); vb2 = sgd_nesterov::init(b2)
-  vW3 = sgd_nesterov::init(W3); vb3 = sgd_nesterov::init(b3)
-  vW4 = sgd_nesterov::init(W4); vb4 = sgd_nesterov::init(b4)
-
-  # Regularization
-  lambda = 5e-04
-
-  # Optimize
-  print("Starting optimization")
-  batch_size = 64
-  iters = ceil(N / batch_size)
-  for (e in 1:epochs) {
-    for(i in 1:iters) {
-      # Get next batch
-      beg = ((i-1) * batch_size) %% N + 1
-      end = min(N, beg + batch_size - 1)
-      X_batch = X[beg:end,]
-      y_batch = y[beg:end,]
-
-      # Compute forward pass
-      ## layer 1: conv1 -> relu1 -> pool1
-      [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
-                                                pad, pad)
-      outr1 = relu::forward(outc1)
-      [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
-                                                    strideh=2, stridew=2, pad=0, pad=0)
-      ## layer 2: conv2 -> relu2 -> pool2
-      [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
-                                                stride, stride, pad, pad)
-      outr2 = relu::forward(outc2)
-      [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
-                                                    strideh=2, stridew=2, pad=0, pad=0)
-      ## layer 3:  affine3 -> relu3 -> dropout
-      outa3 = affine::forward(outp2, W3, b3)
-      outr3 = relu::forward(outa3)
-      [outd3, maskd3] = dropout::forward(outr3, 0.5, -1)
-      ## layer 4:  affine4 -> softmax
-      outa4 = affine::forward(outd3, W4, b4)
-      probs = softmax::forward(outa4)
-
-      # Compute loss & accuracy for training & validation data every 100 iterations.
-      if (i %% 100 == 0) {
-        # Compute training loss & accuracy
-        loss_data = cross_entropy_loss::forward(probs, y_batch)
-        loss_reg_W1 = l2_reg::forward(W1, lambda)
-        loss_reg_W2 = l2_reg::forward(W2, lambda)
-        loss_reg_W3 = l2_reg::forward(W3, lambda)
-        loss_reg_W4 = l2_reg::forward(W4, lambda)
-        loss = loss_data + loss_reg_W1 + loss_reg_W2 + loss_reg_W3 + loss_reg_W4
-        accuracy = mean(rowIndexMax(probs) == rowIndexMax(y_batch))
-
-        # Compute validation loss & accuracy
-        probs_val = predict(X_val, C, Hin, Win, W1, b1, W2, b2, W3, b3, W4, b4)
-        loss_val = cross_entropy_loss::forward(probs_val, y_val)
-        accuracy_val = mean(rowIndexMax(probs_val) == rowIndexMax(y_val))
-
-        # Output results
-        print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: "
-              + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val)
-      }
-
-      # Compute data backward pass
-      ## loss:
-      dprobs = cross_entropy_loss::backward(probs, y_batch)
-      ## layer 4:  affine4 -> softmax
-      douta4 = softmax::backward(dprobs, outa4)
-      [doutd3, dW4, db4] = affine::backward(douta4, outr3, W4, b4)
-      ## layer 3:  affine3 -> relu3 -> dropout
-      doutr3 = dropout::backward(doutd3, outr3, 0.5, maskd3)
-      douta3 = relu::backward(doutr3, outa3)
-      [doutp2, dW3, db3] = affine::backward(douta3, outp2, W3, b3)
-      ## layer 2: conv2 -> relu2 -> pool2
-      doutr2 = max_pool2d::backward(doutp2, Houtp2, Woutp2, outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
-                                    strideh=2, stridew=2, pad=0, pad=0)
-      doutc2 = relu::backward(doutr2, outc2)
-      [doutp1, dW2, db2] = conv2d::backward(doutc2, Houtc2, Woutc2, outp1, W2, b2, F1,
-                                            Houtp1, Woutp1, Hf, Wf, stride, stride, pad, pad)
-      ## layer 1: conv1 -> relu1 -> pool1
-      doutr1 = max_pool2d::backward(doutp1, Houtp1, Woutp1, outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
-                                    strideh=2, stridew=2, pad=0, pad=0)
-      doutc1 = relu::backward(doutr1, outc1)
-      [dX_batch, dW1, db1] = conv2d::backward(doutc1, Houtc1, Woutc1, X_batch, W1, b1, C, Hin, Win,
-                                              Hf, Wf, stride, stride, pad, pad)
-
-      # Compute regularization backward pass
-      dW1_reg = l2_reg::backward(W1, lambda)
-      dW2_reg = l2_reg::backward(W2, lambda)
-      dW3_reg = l2_reg::backward(W3, lambda)
-      dW4_reg = l2_reg::backward(W4, lambda)
-      dW1 = dW1 + dW1_reg
-      dW2 = dW2 + dW2_reg
-      dW3 = dW3 + dW3_reg
-      dW4 = dW4 + dW4_reg
-
-      # Optimize with SGD w/ Nesterov momentum
-      [W1, vW1] = sgd_nesterov::update(W1, dW1, lr, mu, vW1)
-      [b1, vb1] = sgd_nesterov::update(b1, db1, lr, mu, vb1)
-      [W2, vW2] = sgd_nesterov::update(W2, dW2, lr, mu, vW2)
-      [b2, vb2] = sgd_nesterov::update(b2, db2, lr, mu, vb2)
-      [W3, vW3] = sgd_nesterov::update(W3, dW3, lr, mu, vW3)
-      [b3, vb3] = sgd_nesterov::update(b3, db3, lr, mu, vb3)
-      [W4, vW4] = sgd_nesterov::update(W4, dW4, lr, mu, vW4)
-      [b4, vb4] = sgd_nesterov::update(b4, db4, lr, mu, vb4)
-    }
-    # Anneal momentum towards 0.999
-    #mu = mu + (0.999 - mu)/(1+epochs-e)
-    # Decay learning rate
-    lr = lr * decay
-  }
-}
-
-predict = function(matrix[double] X, int C, int Hin, int Win,
-                   matrix[double] W1, matrix[double] b1,
-                   matrix[double] W2, matrix[double] b2,
-                   matrix[double] W3, matrix[double] b3,
-                   matrix[double] W4, matrix[double] b4)
-    return (matrix[double] probs) {
-  /*
-   * Computes the class probability predictions of a convolutional
-   * net using the "LeNet" architecture.
-   *
-   * The input matrix, X, has N examples, each represented as a 3D
-   * volume unrolled into a single vector.
-   *
-   * Inputs:
-   *  - X: Input data matrix, of shape (N, C*Hin*Win).
-   *  - C: Number of input channels (dimensionality of input depth).
-   *  - Hin: Input height.
-   *  - Win: Input width.
-   *  - W1: 1st layer weights (parameters) matrix, of shape (F1, C*Hf*Wf).
-   *  - b1: 1st layer biases vector, of shape (F1, 1).
-   *  - W2: 2nd layer weights (parameters) matrix, of shape (F2, F1*Hf*Wf).
-   *  - b2: 2nd layer biases vector, of shape (F2, 1).
-   *  - W3: 3rd layer weights (parameters) matrix, of shape (F2*(Hin/4)*(Win/4), N3).
-   *  - b3: 3rd layer biases vector, of shape (1, N3).
-   *  - W4: 4th layer weights (parameters) matrix, of shape (N3, K).
-   *  - b4: 4th layer biases vector, of shape (1, K).
-   *
-   * Outputs:
-   *  - probs: Class probabilities, of shape (N, K).
-   */
-  N = nrow(X)
-
-  # Network:
-  # conv1 -> relu1 -> pool1 -> conv2 -> relu2 -> pool2 -> affine3 -> relu3 -> affine4 -> softmax
-  Hf = 5  # filter height
-  Wf = 5  # filter width
-  stride = 1
-  pad = 2  # For same dimensions, (Hf - stride) / 2
-
-  F1 = nrow(W1)  # num conv filters in conv1
-  F2 = nrow(W2)  # num conv filters in conv2
-  N3 = ncol(W3)  # num nodes in affine3
-  K = ncol(W4)  # num nodes in affine4, equal to number of target dimensions (num classes)
-
-  # Compute predictions over mini-batches
-  probs = matrix(0, rows=N, cols=K)
-  batch_size = 64
-  iters = ceil(N / batch_size)
-  for(i in 1:iters) {
-    # Get next batch
-    beg = ((i-1) * batch_size) %% N + 1
-    end = min(N, beg + batch_size - 1)
-    X_batch = X[beg:end,]
-
-    # Compute forward pass
-    ## layer 1: conv1 -> relu1 -> pool1
-    [outc1, Houtc1, Woutc1] = conv2d::forward(X_batch, W1, b1, C, Hin, Win, Hf, Wf, stride, stride,
-                                              pad, pad)
-    outr1 = relu::forward(outc1)
-    [outp1, Houtp1, Woutp1] = max_pool2d::forward(outr1, F1, Houtc1, Woutc1, Hf=2, Wf=2,
-                                                  strideh=2, stridew=2, pad=0, pad=0)
-    ## layer 2: conv2 -> relu2 -> pool2
-    [outc2, Houtc2, Woutc2] = conv2d::forward(outp1, W2, b2, F1, Houtp1, Woutp1, Hf, Wf,
-                                              stride, stride, pad, pad)
-    outr2 = relu::forward(outc2)
-    [outp2, Houtp2, Woutp2] = max_pool2d::forward(outr2, F2, Houtc2, Woutc2, Hf=2, Wf=2,
-                                                  strideh=2, stridew=2, pad=0, pad=0)
-    ## layer 3:  affine3 -> relu3
-    outa3 = affine::forward(outp2, W3, b3)
-    outr3 = relu::forward(outa3)
-    ## layer 4:  affine4 -> softmax
-    outa4 = affine::forward(outr3, W4, b4)
-    probs_batch = softmax::forward(outa4)
-
-    # Store predictions
-    probs[beg:end,] = probs_batch
-  }
-}
-
-eval = function(matrix[double] probs, matrix[double] y)
-    return (double loss, double accuracy) {
-  /*
-   * Evaluates a convolutional net using the "LeNet" architecture.
-   *
-   * The probs matrix contains the class probability predictions
-   * of K classes over N examples.  The targets, y, have K classes,
-   * and are one-hot encoded.
-   *
-   * Inputs:
-   *  - probs: Class probabilities, of shape (N, K).
-   *  - y: Target matrix, of shape (N, K).
-   *
-   * Outputs:
-   *  - loss: Scalar loss, of shape (1).
-   *  - accuracy: Scalar accuracy, of shape (1).
-   */
-  # Compute loss & accuracy
-  loss = cross_entropy_loss::forward(probs, y)
-  correct_pred = rowIndexMax(probs) == rowIndexMax(y)
-  accuracy = mean(correct_pred)
-}
-
-generate_dummy_data = function()
-    return (matrix[double] X, matrix[double] y, int C, int Hin, int Win) {
-  /*
-   * Generate a dummy dataset similar to the MNIST dataset.
-   *
-   * Outputs:
-   *  - X: Input data matrix, of shape (N, D).
-   *  - y: Target matrix, of shape (N, K).
-   *  - C: Number of input channels (dimensionality of input depth).
-   *  - Hin: Input height.
-   *  - Win: Input width.
-   */
-  # Generate dummy input data
-  N = 1024  # num examples
-  C = 1  # num input channels
-  Hin = 28  # input height
-  Win = 28  # input width
-  K = 10  # num target classes
-  X = rand(rows=N, cols=C*Hin*Win, pdf="normal")
-  classes = round(rand(rows=N, cols=1, min=1, max=K, pdf="uniform"))
-  y = table(seq(1, N), classes)  # one-hot encoding
-}
-

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
deleted file mode 100644
index 4c8c434..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-predict.dml
+++ /dev/null
@@ -1,77 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# MNIST Softmax - Predict
-#
-# This script computes the class probability predictions of a
-# trained softmax classifier on images of handwritten digits.
-#
-# Inputs:
-#  - X: File containing training images.
-#     The format is "pixel_1, pixel_2, ..., pixel_n".
-#  - model_dir: Directory containing the trained weights and biases
-#     of the model.
-#  - out_dir: Directory to store class probability predictions for
-#     each image.
-#  - fmt: [DEFAULT: "csv"] File format of `X` and output predictions.
-#     Options include: "csv", "mm", "text", and "binary".
-#
-# Outputs:
-#  - probs: File containing class probability predictions for each
-#     image.
-#
-# Data:
-# The X file should contain images of handwritten digits,
-# where each example is a 28x28 pixel image of grayscale values in
-# the range [0,255] stretched out as 784 pixels.
-#
-# Sample Invocation:
-# 1. Download images.
-#
-#   For example, save images to `nn/examples/data/mnist/images.csv`.
-#
-# 2. Execute using Spark
-#   ```
-#   spark-submit --master local[*] --driver-memory 5G
-#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
-#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-predict.dml
-#   -nvargs X=nn/examples/data/mnist/images.csv
-#   model_dir=nn/examples/model/mnist_softmax out_dir=nn/examples/data/mnist
-#
-source("nn/examples/mnist_softmax.dml") as mnist_softmax
-
-# Read training data
-fmt = ifdef($fmt, "csv")
-X = read($X, format=fmt)
-
-# Scale images to [0,1], and one-hot encode the labels
-X = X / 255.0
-
-# Read model coefficients
-W = read($model_dir+"/W")
-b = read($model_dir+"/b")
-
-# Predict classes
-probs = mnist_softmax::predict(X, W, b)
-
-# Output results
-write(probs, $out_dir+"/probs."+fmt, format=fmt)
-

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/43c321d1/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml b/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
deleted file mode 100644
index 09970f0..0000000
--- a/scripts/staging/SystemML-NN/nn/examples/mnist_softmax-train.dml
+++ /dev/null
@@ -1,110 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# MNIST Softmax - Train
-#
-# This script trains a softmax classifier on images of handwritten
-# digits.
-#
-# Inputs:
-#  - train: File containing labeled MNIST training images.
-#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
-#  - test: File containing labeled MNIST test images.
-#     The format is "label, pixel_1, pixel_2, ..., pixel_n".
-#  - out_dir: Directory to store weights and bias matrices of
-#     trained model, as well as final test accuracy.
-#  - fmt: [DEFAULT: "csv"] File format of `train` and `test` data.
-#     Options include: "csv", "mm", "text", and "binary".
-#
-# Outputs:
-#  - W: File containing the trained weights of the model.
-#  - b: File containing the trained biases of the model.
-#  - accuracy: File containing the final accuracy on the test data.
-#
-# Data:
-# The MNIST dataset contains labeled images of handwritten digits,
-# where each example is a 28x28 pixel image of grayscale values in
-# the range [0,255] stretched out as 784 pixels, and each label is
-# one of 10 possible digits in [0,9].
-#
-# Sample Invocation (running from wihtin the `examples` folder):
-# 1. Download data (60,000 training examples, and 10,000 test examples)
-#   ```
-#   nn/examples/get_mnist_data.sh
-#   ```
-#
-# 2. Execute using Spark
-#   ```
-#   spark-submit --master local[*] --driver-memory 10G
-#   --conf spark.driver.maxResultSize=0 --conf spark.rpc.message.maxSize=128
-#   $SYSTEMML_HOME/target/SystemML.jar -f nn/examples/mnist_softmax-train.dml
-#   -nvargs train=nn/examples/data/mnist/mnist_train.csv test=nn/examples/data/mnist/mnist_test.csv
-#   epochs=1 out_dir=nn/examples/model/mnist_softmax
-#   ```
-#
-source("nn/examples/mnist_softmax.dml") as mnist_softmax
-
-# Read training data
-fmt = ifdef($fmt, "csv")
-train = read($train, format=fmt)
-test = read($test, format=fmt)
-epochs = ifdef($epochs, 1)
-out_dir = ifdef($out_dir, ".")
-
-# Extract images and labels
-images = train[,2:ncol(train)]
-labels = train[,1]
-X_test = test[,2:ncol(test)]
-y_test = test[,1]
-
-# Scale images to [0,1], and one-hot encode the labels
-n = nrow(train)
-n_test = nrow(test)
-classes = 10
-images = images / 255.0
-labels = table(seq(1, n), labels+1, n, classes)
-X_test = X_test / 255.0
-y_test = table(seq(1, n_test), y_test+1, n_test, classes)
-
-# Split into training (55,000 examples) and validation (5,000 examples)
-X = images[5001:nrow(images),]
-X_val = images[1:5000,]
-y = labels[5001:nrow(images),]
-y_val = labels[1:5000,]
-
-# Train
-[W, b] = mnist_softmax::train(X, y, X_val, y_val, epochs)
-
-# Write model out
-write(W, out_dir+"/W")
-write(b, out_dir+"/b")
-
-# Eval on test set
-probs = mnist_softmax::predict(X_test, W, b)
-[loss, accuracy] = mnist_softmax::eval(probs, y_test)
-
-# Output results
-print("Test Accuracy: " + accuracy)
-write(accuracy, out_dir+"/accuracy")
-
-print("")
-print("")
-