You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/02/12 05:21:30 UTC

systemml git commit: [MINOR] Cleanup and simplification of l2svm algorithm script

Repository: systemml
Updated Branches:
  refs/heads/master 4add81b04 -> 85cb9e34e


[MINOR] Cleanup and simplification of l2svm algorithm script

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/85cb9e34
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/85cb9e34
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/85cb9e34

Branch: refs/heads/master
Commit: 85cb9e34e79b1b87ebe09d2a37658f9265d8ef9a
Parents: 4add81b
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Feb 11 21:21:18 2018 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Feb 11 21:21:18 2018 -0800

----------------------------------------------------------------------
 scripts/algorithms/l2-svm.dml | 84 +++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/85cb9e34/scripts/algorithms/l2-svm.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/l2-svm.dml b/scripts/algorithms/l2-svm.dml
index cf669b5..a8e6166 100644
--- a/scripts/algorithms/l2-svm.dml
+++ b/scripts/algorithms/l2-svm.dml
@@ -43,81 +43,67 @@
 # Log       String  ---         [OPTIONAL] Location to write the log file
 # ---------------------------------------------------------------------------------------------
 
-# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+# hadoop jar SystemML.jar -f $L2SVM_HOME/l2-svm.dml -nvargs X=$INPUT_DIR/X Y=$INPUT_DIR/Y \
+#   icpt=0 tol=0.001 reg=1 maxiter=100 model=$OUPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
 #
 # Note about inputs: 
-# Assumes that labels (entries in Y) 
-# are set to either -1 or +1
-# or the result of recoding
-#
+# Assumes that labels (entries in Y) are set to either -1 or +1 or non-negative integers
 
-cmdLine_fmt = ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol = ifdef($tol, 0.001)
-cmdLine_reg = ifdef($reg, 1.0)
-cmdLine_maxiter = ifdef($maxiter, 100)
+fmt = ifdef($fmt, "text")
+intercept = ifdef($icpt, 0)
+epsilon = ifdef($tol, 0.001)
+lambda = ifdef($reg, 1.0)
+maxiterations = ifdef($maxiter, 100)
 
 X = read($X)
 Y = read($Y)
 
+#check input parameter assertions
 if(nrow(X) < 2)
   stop("Stopping due to invalid inputs: Not possible to learn a binary class classifier without at least 2 rows")
+if(intercept != 0 & intercept != 1)
+  stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
+if(epsilon < 0)
+  stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
+if(lambda < 0)
+  stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
+if(maxiterations < 1)
+  stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
 
+#check input lables and transform into -1/1
 check_min = min(Y)
 check_max = max(Y)
 num_min = sum(Y == check_min)
 num_max = sum(Y == check_max)
-
 if(check_min == check_max)
   stop("Stopping due to invalid inputs: Y seems to contain exactly one label")
-
 if(num_min + num_max != nrow(Y))
   stop("Stopping due to invalid inputs: Y seems to contain more than 2 labels")
-	
-if(check_min != -1 | check_max != +1) 
+if(check_min != -1 | check_max != 1)
   Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min)
 
 positive_label = check_max
 negative_label = check_min
-
-intercept = cmdLine_icpt
-if(intercept != 0 & intercept != 1)
-  stop("Stopping due to invalid argument: Currently supported intercept options are 0 and 1")
-
-epsilon = cmdLine_tol
-if(epsilon < 0)
-  stop("Stopping due to invalid argument: Tolerance (tol) must be non-negative")
-	
-lambda = cmdLine_reg
-if(lambda < 0)
-  stop("Stopping due to invalid argument: Regularization constant (reg) must be non-negative")
-	
-maxiterations = cmdLine_maxiter
-if(maxiterations < 1)
-  stop("Stopping due to invalid argument: Maximum iterations should be a positive integer")
-
 num_samples = nrow(X)
 dimensions = ncol(X)
+num_rows_in_w = dimensions
 
 if (intercept == 1) {
-  ones  = matrix(1, rows=num_samples, cols=1)
+  ones = matrix(1, rows=num_samples, cols=1)
   X = cbind(X, ones);
+  num_rows_in_w += 1
 }
 
-num_rows_in_w = dimensions
-if(intercept == 1){
-  num_rows_in_w = num_rows_in_w + 1
-}
-w = matrix(0, rows=num_rows_in_w, cols=1)
-
+w = matrix(0, num_rows_in_w, 1)
+Xw = matrix(0, rows=nrow(X), cols=1)
 g_old = t(X) %*% Y
 s = g_old
 
-Xw = matrix(0, rows=nrow(X), cols=1)
 debug_str = "# Iter, Obj"
 iter = 0
 continue = TRUE
-while(continue & iter < maxiterations)  {
+
+while(continue & iter < maxiterations) {
   # minimizing primal obj along direction s
   step_sz = 0
   Xd = X %*% s
@@ -125,15 +111,14 @@ while(continue & iter < maxiterations)  {
   dd = lambda * sum(s * s)
   
   continue1 = TRUE
-  while(continue1){
+  while(continue1) {
     tmp_Xw = Xw + step_sz*Xd
-    out = 1 - Y * (tmp_Xw)
-    sv = (out > 0)
+    out = 1 - Y * tmp_Xw
+    sv = out > 0
     out = out * sv
     g = wd + step_sz*dd - sum(out * Y * Xd)
     h = dd + sum(Xd * sv * Xd)
     step_sz = step_sz - g/h
-    
     continue1 = (g*g/h >= 0.0000000001);
   }
 
@@ -142,7 +127,7 @@ while(continue & iter < maxiterations)  {
   Xw += step_sz * Xd
   
   out = 1 - Y * Xw
-  sv = (out > 0)
+  sv = out > 0
   out = sv * out
   obj = 0.5 * sum(out * out) + lambda/2 * sum(w * w)
   g_new = t(X) %*% (out * Y) - lambda * w
@@ -161,16 +146,15 @@ while(continue & iter < maxiterations)  {
   iter = iter + 1
 }
 
-extra_model_params = matrix(0, rows=4, cols=1)
+extra_model_params = matrix(0, 4, 1)
 extra_model_params[1,1] = positive_label
 extra_model_params[2,1] = negative_label
 extra_model_params[3,1] = intercept
 extra_model_params[4,1] = dimensions
 
-w = t(cbind(t(w), t(extra_model_params)))
-write(w, $model, format=cmdLine_fmt)
+w = rbind(w, extra_model_params)
+write(w, $model, format=fmt)
 
 logFile = $Log
-if(logFile != " ") {
+if(logFile != " ")
   write(debug_str, logFile)
-}