You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2017/04/01 01:42:37 UTC

[4/7] incubator-systemml git commit: [SYSTEMML-1413] Extract test-only utilities from `nn/util.dml`

[SYSTEMML-1413] Extract test-only utilities from `nn/util.dml`

This commit extracts utility functions only used for testing from
`nn/util.dml` to a new `nn/test/util.dml`.

Closes #447.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/5c59e03b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/5c59e03b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/5c59e03b

Branch: refs/heads/master
Commit: 5c59e03b4caca3a519ec871475d2081bff16fd3a
Parents: 7744924
Author: Mike Dusenberry <mw...@us.ibm.com>
Authored: Fri Mar 31 18:39:04 2017 -0700
Committer: Mike Dusenberry <mw...@us.ibm.com>
Committed: Fri Mar 31 18:39:04 2017 -0700

----------------------------------------------------------------------
 .../staging/SystemML-NN/nn/test/grad_check.dml  | 138 +++++++----------
 scripts/staging/SystemML-NN/nn/test/test.dml    |  47 +++---
 scripts/staging/SystemML-NN/nn/test/util.dml    | 155 +++++++++++++++++++
 scripts/staging/SystemML-NN/nn/util.dml         |  92 -----------
 4 files changed, 232 insertions(+), 200 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/grad_check.dml b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
index adc1c9a..ba9a317 100644
--- a/scripts/staging/SystemML-NN/nn/test/grad_check.dml
+++ b/scripts/staging/SystemML-NN/nn/test/grad_check.dml
@@ -44,45 +44,7 @@ source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
 source("nn/layers/tanh.dml") as tanh
 source("nn/test/conv_simple.dml") as conv_simple
 source("nn/test/max_pool_simple.dml") as max_pool_simple
-source("nn/util.dml") as util
-
-check_rel_error = function(double dw_a, double dw_n, double lossph, double lossmh)
-    return (double rel_error) {
-  /*
-   * Check and report any issues with the relative error measure between
-   * the analytical and numerical partial derivatives.
-   *
-   *  - Issues an "ERROR" statement for relative errors > 1e-2,
-   *  indicating that the gradient is likely incorrect.
-   *  - Issues a "WARNING" statement for relative errors < 1e-2
-   *  but > 1e-4, indicating that the may be incorrect.
-   *
-   * Inputs:
-   *  - dw_a: Analytical partial derivative wrt w.
-   *  - dw_n: Numerical partial derivative wrt w.
-   *  - lossph: Loss evaluated with w set to w+h.
-   *  - lossmh: Loss evaluated with w set to w-h.
-   *
-   * Outputs:
-   *  - rel_error: Relative error measure between the two derivatives.
-   */
-  # Compute relative error
-  rel_error = util::compute_rel_error(dw_a, dw_n)
-
-  # Evaluate relative error
-  thresh_error = 1e-2
-  thresh_warn = 1e-4
-  if (rel_error > thresh_error) {
-    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a +
-          " analytical vs " + dw_n + " numerical, with lossph " + lossph +
-          " and lossmh " + lossmh)
-  }
-  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
-    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
-          " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph +
-          " and lossmh " + lossmh)
-  }
-}
+source("nn/test/util.dml") as test_util
 
 affine = function() {
   /*
@@ -120,7 +82,7 @@ affine = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -139,7 +101,7 @@ affine = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -158,7 +120,7 @@ affine = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 }
@@ -217,7 +179,7 @@ batch_norm = function() {
         dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
       }
     }
 
@@ -238,7 +200,8 @@ batch_norm = function() {
         dgamma_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
+                                                    lossph, lossmh)
       }
     }
 
@@ -259,7 +222,8 @@ batch_norm = function() {
         dbeta_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
+                                                    lossph, lossmh)
       }
     }
   }
@@ -310,7 +274,7 @@ conv = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -329,7 +293,7 @@ conv = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -348,7 +312,7 @@ conv = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 }
@@ -401,7 +365,7 @@ conv_builtin = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -422,7 +386,7 @@ conv_builtin = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -443,7 +407,7 @@ conv_builtin = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 }
@@ -495,7 +459,7 @@ conv_simple = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -516,7 +480,7 @@ conv_simple = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -537,7 +501,7 @@ conv_simple = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 }
@@ -573,7 +537,7 @@ cross_entropy_loss = function() {
       dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
     }
   }
 }
@@ -613,7 +577,7 @@ dropout = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 }
@@ -647,7 +611,7 @@ l1_loss = function() {
       dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
     }
   }
 }
@@ -681,7 +645,8 @@ l1_reg = function() {
       dW_num = (reg_lossph-reg_lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
+                                                  reg_lossph, reg_lossmh)
     }
   }
 }
@@ -715,7 +680,7 @@ l2_loss = function() {
       dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
     }
   }
 }
@@ -749,7 +714,8 @@ l2_reg = function() {
       dW_num = (reg_lossph-reg_lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, reg_lossph, reg_lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num,
+                                                  reg_lossph, reg_lossmh)
     }
   }
 }
@@ -783,7 +749,7 @@ log_loss = function() {
       dpred_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dpred[i,j]), dpred_num, lossph, lossmh)
     }
   }
 }
@@ -835,7 +801,7 @@ lstm = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -858,7 +824,7 @@ lstm = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -881,7 +847,7 @@ lstm = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 
@@ -904,7 +870,7 @@ lstm = function() {
       dout0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
     }
   }
 
@@ -927,7 +893,7 @@ lstm = function() {
       dc0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dc0[i,j]), dc0_num, lossph, lossmh)
     }
   }
 }
@@ -975,7 +941,7 @@ max_pool = function() {
         dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
       }
     }
   }
@@ -1027,7 +993,7 @@ max_pool_builtin = function() {
         dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
       }
     }
   }
@@ -1079,7 +1045,7 @@ max_pool_simple = function() {
         dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
       }
     }
   }
@@ -1125,7 +1091,7 @@ relu = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 }
@@ -1169,7 +1135,7 @@ rnn = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -1188,7 +1154,7 @@ rnn = function() {
       dW_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW[i,j]), dW_num, lossph, lossmh)
     }
   }
 
@@ -1207,7 +1173,7 @@ rnn = function() {
       db_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db[i,j]), db_num, lossph, lossmh)
     }
   }
 
@@ -1226,7 +1192,7 @@ rnn = function() {
       dout0_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dout0[i,j]), dout0_num, lossph, lossmh)
     }
   }
 }
@@ -1264,7 +1230,7 @@ sigmoid = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 }
@@ -1303,7 +1269,7 @@ softmax = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 }
@@ -1368,7 +1334,7 @@ spatial_batch_norm = function() {
         dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
       }
     }
 
@@ -1391,7 +1357,8 @@ spatial_batch_norm = function() {
         dgamma_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dgamma[i,j]), dgamma_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dgamma[i,j]), dgamma_num,
+                                                    lossph, lossmh)
       }
     }
 
@@ -1414,7 +1381,8 @@ spatial_batch_norm = function() {
         dbeta_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
         # Check error
-        rel_error = check_rel_error(as.scalar(dbeta[i,j]), dbeta_num, lossph, lossmh)
+        rel_error = test_util::check_rel_grad_error(as.scalar(dbeta[i,j]), dbeta_num,
+                                                    lossph, lossmh)
       }
     }
   }
@@ -1454,7 +1422,7 @@ tanh = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 }
@@ -1522,7 +1490,7 @@ two_layer_affine_l2_net = function() {
       dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
     }
   }
 
@@ -1539,7 +1507,7 @@ two_layer_affine_l2_net = function() {
       dWij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW1[i,j]), dWij_num, lossph, lossmh)
     }
   }
 
@@ -1556,7 +1524,7 @@ two_layer_affine_l2_net = function() {
       dWij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(dW2[i,j]), dWij_num, lossph, lossmh)
     }
   }
 
@@ -1573,7 +1541,7 @@ two_layer_affine_l2_net = function() {
       dbij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db1[i,j]), dbij_num, lossph, lossmh)
     }
   }
 
@@ -1590,7 +1558,7 @@ two_layer_affine_l2_net = function() {
       dbij_num = (lossph-lossmh) / (2*h)  # numerical derivative
 
       # Check error
-      rel_error = check_rel_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
+      rel_error = test_util::check_rel_grad_error(as.scalar(db2[i,j]), dbij_num, lossph, lossmh)
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/test.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/test.dml b/scripts/staging/SystemML-NN/nn/test/test.dml
index b25fae2..8fb0d04 100644
--- a/scripts/staging/SystemML-NN/nn/test/test.dml
+++ b/scripts/staging/SystemML-NN/nn/test/test.dml
@@ -32,6 +32,7 @@ source("nn/layers/spatial_batch_norm.dml") as spatial_batch_norm
 source("nn/layers/tanh.dml") as tanh
 source("nn/test/conv_simple.dml") as conv_simple
 source("nn/test/max_pool_simple.dml") as max_pool_simple
+source("nn/test/util.dml") as test_util
 source("nn/util.dml") as util
 
 batch_norm = function() {
@@ -62,7 +63,7 @@ batch_norm = function() {
                     1.34160733  1.34160721  1.34160733  1.34160733", rows=1, cols=N*D)
   out = matrix(out, rows=1, cols=N*D)
   for (i in 1:length(out)) {
-    rel_error = util::check_rel_error(as.scalar(out[1,i]),
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                       as.scalar(target[1,i]), 1e-3, 1e-4)
   }
 }
@@ -100,9 +101,9 @@ conv = function() {
   out_simple = matrix(out_simple, rows=1, cols=N*F*Hout*Wout)
   out_builtin = matrix(out_builtin, rows=1, cols=N*F*Hout*Wout)
   for (i in 1:length(out)) {
-    rel_error = util::check_rel_error(as.scalar(out[1,i]),
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                       as.scalar(out_simple[1,i]), 1e-10, 1e-12)
-    rel_error = util::check_rel_error(as.scalar(out[1,i]),
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                       as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
   }
 }
@@ -160,7 +161,7 @@ im2col = function() {
   x_pad2 = util::col2im(x_cols, C, Hin+2*pad, Win+2*pad, Hf, Wf, stride, stride, "none")
 
   # Equivalency check
-  equivalent = util::all_equal(x_pad, x_pad2)
+  equivalent = test_util::all_equal(x_pad, x_pad2)
   if (!equivalent) {
     print("ERROR: im2col and then col2im does not yield the original image.")
   }
@@ -199,7 +200,7 @@ padding = function() {
   x1 = util::unpad_image(x_pad, Hin, Win, pad, pad)
 
   # Equivalency check
-  equivalent = util::all_equal(x, x1)
+  equivalent = test_util::all_equal(x, x1)
   if (!equivalent) {
     print("ERROR: Padding and then unpadding does not yield the original image.")
   }
@@ -238,9 +239,9 @@ max_pool = function() {
       out_simple = matrix(out_simple, rows=1, cols=N*C*Hout*Wout)
       out_builtin = matrix(out_builtin, rows=1, cols=N*C*Hout*Wout)
       for (i in 1:length(out)) {
-        rel_error = util::check_rel_error(as.scalar(out[1,i]),
+        rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                           as.scalar(out_simple[1,i]), 1e-10, 1e-12)
-        rel_error = util::check_rel_error(as.scalar(out[1,i]),
+        rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                           as.scalar(out_builtin[1,i]), 1e-10, 1e-12)
       }
 
@@ -257,9 +258,9 @@ max_pool = function() {
       dX_simple = matrix(dX_simple, rows=1, cols=N*C*Hin*Win)
       dX_builtin = matrix(dX_builtin, rows=1, cols=N*C*Hin*Win)
       for (i in 1:length(dX)) {
-        rel_error = util::check_rel_error(as.scalar(dX[1,i]),
+        rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
                                           as.scalar(dX_simple[1,i]), 1e-10, 1e-12)
-        rel_error = util::check_rel_error(as.scalar(dX[1,i]),
+        rel_error = test_util::check_rel_error(as.scalar(dX[1,i]),
                                           as.scalar(dX_builtin[1,i]), 1e-10, 1e-12)
       }
     }
@@ -302,9 +303,9 @@ max_pool = function() {
   #  8  16
   target = matrix("6 8 14 16 6 14 8 16", rows=1, cols=C*Hout*Wout)
   target = rbind(target, target)  # n=2
-  tmp = util::check_all_equal(out, target)
-  tmp = util::check_all_equal(out_simple, target)
-  tmp = util::check_all_equal(out_builtin, target)
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
 
   print(" - Testing for correct behavior against known answer w/ pad=1.")
   # generate data
@@ -342,9 +343,9 @@ max_pool = function() {
   #  4 12 16
   target = matrix("1 3 4 9 11 12 13 15 16 1 9 13 3 11 15 4 12 16", rows=1, cols=C*Hout*Wout)
   target = rbind(target, target)  # n=2
-  tmp = util::check_all_equal(out, target)
-  tmp = util::check_all_equal(out_simple, target)
-  tmp = util::check_all_equal(out_builtin, target)
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
 
   print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=0.")
   # generate data
@@ -377,9 +378,9 @@ max_pool = function() {
   #  -3 -11
   target = matrix("-1 -3 -9 -11 -1 -9 -3 -11", rows=1, cols=C*Hout*Wout)
   target = rbind(target, target)  # n=2
-  tmp = util::check_all_equal(out, target)
-  tmp = util::check_all_equal(out_simple, target)
-  tmp = util::check_all_equal(out_builtin, target)
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
 
 
   print(" - Testing for correct behavior against known answer w/ all negative matrix w/ pad=1.")
@@ -418,9 +419,9 @@ max_pool = function() {
   #  0  0  0
   target = matrix("-1 -2 -4 -5 -6 -8 -13 -14 -16 -1 -5 -13 -2 -6 -14 -4 -8 -16", rows=1, cols=C*Hout*Wout)
   target = rbind(target, target)  # n=2
-  tmp = util::check_all_equal(out, target)
-  tmp = util::check_all_equal(out_simple, target)
-  tmp = util::check_all_equal(out_builtin, target)
+  tmp = test_util::check_all_equal(out, target)
+  tmp = test_util::check_all_equal(out_simple, target)
+  tmp = test_util::check_all_equal(out_builtin, target)
 }
 
 spatial_batch_norm = function() {
@@ -509,7 +510,7 @@ spatial_batch_norm = function() {
                                                                                 cols=N*C*Hin*Win)
   out = matrix(out, rows=1, cols=N*C*Hin*Win)
   for (i in 1:length(out)) {
-    rel_error = util::check_rel_error(as.scalar(out[1,i]),
+    rel_error = test_util::check_rel_error(as.scalar(out[1,i]),
                                       as.scalar(target[1,i]), 1e-3, 1e-4)
   }
 }
@@ -531,7 +532,7 @@ tanh = function() {
   # Equivalency check
   for (i in 1:nrow(out)) {
     for (j in 1:ncol(out)) {
-      rel_error = util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
+      rel_error = test_util::check_rel_error(as.scalar(out[i,j]), as.scalar(out_ref[i,j]), 1e-10, 1e-12)
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/test/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/test/util.dml b/scripts/staging/SystemML-NN/nn/test/util.dml
new file mode 100644
index 0000000..128e4db
--- /dev/null
+++ b/scripts/staging/SystemML-NN/nn/test/util.dml
@@ -0,0 +1,155 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Test utility functions.
+ */
+
+all_equal = function(matrix[double] X1, matrix[double] X2)
+    return(boolean equivalent) {
+  /*
+   * Determine if two matrices are equivalent.
+   *
+   * Inputs:
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
+   *
+   * Outputs:
+   *  - equivalent: Whether or not the two matrices are equivalent.
+   */
+  equivalent = as.logical(prod(X1 == X2))
+}
+
+check_all_equal = function(matrix[double] X1, matrix[double] X2)
+    return(boolean equivalent) {
+  /*
+   * Check if two matrices are equivalent, and report any issues.
+   *
+   * Issues an "ERROR" statement if elements of the two matrices are
+   * not equal.
+   *
+   * Inputs:
+   *  - X1: Inputs, of shape (any, any).
+   *  - X2: Inputs, of same shape as X1.
+   *
+   * Outputs:
+   *  - equivalent: Whether or not the two matrices are equivalent.
+   */
+  # Determine if matrices are equivalent
+  equivalent = all_equal(X1, X2)
+
+  # Evaluate relative error
+  if (!equivalent) {
+    print("ERROR: The two matrices are not equivalent.")
+  }
+}
+
+compute_rel_error = function(double x1, double x2)
+    return (double rel_error) {
+  /*
+   * Relative error measure between two values.
+   *
+   * Uses smoothing to avoid divide-by-zero errors.
+   *
+   * Inputs:
+   *  - x1: First value.
+   *  - x2: Second value.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two values.
+   */
+  rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
+}
+
+check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
+    return (double rel_error) {
+  /*
+   * Check and report any issues with the relative error measure between
+   * two values.
+   *
+   * Issues an "ERROR" statement for relative errors > thresh_error,
+   * indicating that the implementation is likely incorrect.
+   *
+   * Issues a "WARNING" statement for relative errors < thresh_error
+   * but > thresh_warn, indicating that the implementation may be
+   * incorrect.
+   *
+   * Inputs:
+   *  - x1: First value.
+   *  - x2: Second value.
+   *  - thresh_error: Error threshold.
+   *  - thresh_warn: Warning threshold.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two values.
+   */
+  # Compute relative error
+  rel_error = compute_rel_error(x1, x2)
+
+  # Evaluate relative error
+  if (rel_error > thresh_error) {
+    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 +
+          " vs " + x2 + ".")
+  }
+  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+          " with " + x1 + " vs " + x2 + ".")
+  }
+}
+
+check_rel_grad_error = function(double dw_a, double dw_n, double lossph, double lossmh)
+    return (double rel_error) {
+  /*
+   * Check and report any issues with the relative error measure between
+   * the analytical and numerical partial derivatives.
+   *
+   *  - Issues an "ERROR" statement for relative errors > 1e-2,
+   *  indicating that the gradient is likely incorrect.
+   *  - Issues a "WARNING" statement for relative errors < 1e-2
+   *  but > 1e-4, indicating that the may be incorrect.
+   *
+   * Inputs:
+   *  - dw_a: Analytical partial derivative wrt w.
+   *  - dw_n: Numerical partial derivative wrt w.
+   *  - lossph: Loss evaluated with w set to w+h.
+   *  - lossmh: Loss evaluated with w set to w-h.
+   *
+   * Outputs:
+   *  - rel_error: Relative error measure between the two derivatives.
+   */
+  # Compute relative error
+  rel_error = compute_rel_error(dw_a, dw_n)
+
+  # Evaluate relative error
+  thresh_error = 1e-2
+  thresh_warn = 1e-4
+  if (rel_error > thresh_error) {
+    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + dw_a +
+          " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+          " and lossmh " + lossmh)
+  }
+  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
+    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
+          " with " + dw_a + " analytical vs " + dw_n + " numerical, with lossph " + lossph +
+          " and lossmh " + lossmh)
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/5c59e03b/scripts/staging/SystemML-NN/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/SystemML-NN/nn/util.dml b/scripts/staging/SystemML-NN/nn/util.dml
index 6b86225..405d208 100644
--- a/scripts/staging/SystemML-NN/nn/util.dml
+++ b/scripts/staging/SystemML-NN/nn/util.dml
@@ -23,98 +23,6 @@
  * Utility functions.
  */
 
-all_equal = function(matrix[double] X1, matrix[double] X2)
-    return(boolean equivalent) {
-  /*
-   * Determine if two matrices are equivalent.
-   *
-   * Inputs:
-   *  - X1: Inputs, of shape (any, any).
-   *  - X2: Inputs, of same shape as X1.
-   *
-   * Outputs:
-   *  - equivalent: Whether or not the two matrices are equivalent.
-   */
-  equivalent = as.logical(prod(X1 == X2))
-}
-
-check_all_equal = function(matrix[double] X1, matrix[double] X2)
-    return(boolean equivalent) {
-  /*
-   * Check if two matrices are equivalent, and report any issues.
-   *
-   * Issues an "ERROR" statement if elements of the two matrices are
-   * not equal.
-   *
-   * Inputs:
-   *  - X1: Inputs, of shape (any, any).
-   *  - X2: Inputs, of same shape as X1.
-   *
-   * Outputs:
-   *  - equivalent: Whether or not the two matrices are equivalent.
-   */
-  # Determine if matrices are equivalent
-  equivalent = all_equal(X1, X2)
-
-  # Evaluate relative error
-  if (!equivalent) {
-    print("ERROR: The two matrices are not equivalent.")
-  }
-}
-
-compute_rel_error = function(double x1, double x2)
-    return (double rel_error) {
-  /*
-   * Relative error measure between two values.
-   *
-   * Uses smoothing to avoid divide-by-zero errors.
-   *
-   * Inputs:
-   *  - x1: First value.
-   *  - x2: Second value.
-   *
-   * Outputs:
-   *  - rel_error: Relative error measure between the two values.
-   */
-  rel_error = abs(x1-x2) / max(1e-8, abs(x1)+abs(x2))
-}
-
-check_rel_error = function(double x1, double x2, double thresh_error, double thresh_warn)
-    return (double rel_error) {
-  /*
-   * Check and report any issues with the relative error measure between
-   * two values.
-   *
-   * Issues an "ERROR" statement for relative errors > thresh_error,
-   * indicating that the implementation is likely incorrect.
-   *
-   * Issues a "WARNING" statement for relative errors < thresh_error
-   * but > thresh_warn, indicating that the implementation may be
-   * incorrect.
-   *
-   * Inputs:
-   *  - x1: First value.
-   *  - x2: Second value.
-   *  - thresh_error: Error threshold.
-   *  - thresh_warn: Warning threshold.
-   *
-   * Outputs:
-   *  - rel_error: Relative error measure between the two values.
-   */
-  # Compute relative error
-  rel_error = compute_rel_error(x1, x2)
-
-  # Evaluate relative error
-  if (rel_error > thresh_error) {
-    print("ERROR: Relative error " + rel_error + " > " + thresh_error + " with " + x1 +
-          " vs " + x2 + ".")
-  }
-  else if (rel_error > thresh_warn & rel_error <= thresh_error) {
-    print("WARNING: Relative error " + rel_error + " > " + thresh_warn + " & <= " + thresh_error +
-          " with " + x1 + " vs " + x2 + ".")
-  }
-}
-
 channel_sums = function(matrix[double] X, int C, int Hin, int Win)
     return (matrix[double] out) {
   /*