You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/08 20:07:57 UTC
[2/4] incubator-systemml git commit: [SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds

[SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds

https://issues.apache.org/jira/browse/SYSTEMML-318

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a71bb12a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a71bb12a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a71bb12a

Branch: refs/heads/master
Commit: a71bb12ae68d08f60185bd8a9e09f47b532a4cc2
Parents: 96019cf
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:04:15 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:04:15 2016 -0800

----------------------------------------------------------------------
 scripts/algorithms/ALS-CG.dml | 176 +++++++++++++++++++++++++++++++++++++
 scripts/algorithms/ALS-DS.dml | 170 +++++++++++++++++++++++++++++++++++
 scripts/algorithms/ALS.dml    | 170 -----------------------------------
 scripts/staging/ALS-CG.dml    | 176 -------------------------------------
 4 files changed, 346 insertions(+), 346 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-CG.dml b/scripts/algorithms/ALS-CG.dml
new file mode 100644
index 0000000..cd2ba0b
--- /dev/null
+++ b/scripts/algorithms/ALS-CG.dml
@@ -0,0 +1,176 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V 
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT 
+# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---      Location to read the input matrix X to be factorized
+# U       String   ---      Location to write the factor matrix U
+# V       String   ---      Location to write the factor matrix V
+# rank    Int      10       Rank of the factorization
+# reg     String   "L2"	    Regularization: 
+#                           "L2" = L2 regularization;
+#                           "wL2" = weighted L2 regularization
+# lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
+# maxi    Int      50       Maximum number of iterations
+# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating U and V once
+# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+#                           if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#                           if check is FALSE thr is ignored
+# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- An m x r matrix U, where r is the factorization rank 
+# 2- An r x n matrix V
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileX      = $X;
+fileU      = $U;
+fileV      = $V;
+
+# Default values of some parameters
+r          = ifdef ($rank, 10);         # $rank=10;
+reg	       = ifdef ($reg, "L2")         # $reg="L2";
+lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter   = ifdef ($maxi, 50);         # $maxi=50;
+check      = ifdef ($check, TRUE);	    # $check=FALSE;
+thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
+fmtO       = ifdef ($fmt, "text");      # $fmt="text";
+ 
+ 
+###### MAIN PART ######
+X = read (fileX);
+m = nrow (X);
+n = ncol (X);
+
+# initializing factor matrices
+U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
+V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
+
+W = ppred (X, 0, "!=");
+  
+# check for regularization
+if( reg == "L2" ) {
+  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+  row_nonzeros = matrix(1, nrow(W), 1);
+  col_nonzeros = matrix(1, ncol(W), 1);
+} 
+else if( reg == "wL2" ) {
+  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+  row_nonzeros = rowSums(W);
+  col_nonzeros = t(colSums(W));
+} 
+else {
+  stop ("wrong regularization! " + reg);
+}
+
+# Loss Function with L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#          + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
+# Loss Function with weighted L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#          + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
+
+is_U = TRUE;  # TRUE = Optimize U, FALSE = Optimize V
+maxinneriter = r ; # min (ncol (U), 15);
+
+if( check ) {
+  loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+  loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+  print ("-----   Initial train loss: " + loss_init + " -----");
+}
+
+it = 0;
+converged = FALSE;
+while( as.integer(it/2) < max_iter & ! converged ) 
+{
+  it = it + 1;
+  if( is_U ) {
+    G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+  } 
+  else {
+    G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
+  }
+
+  R = -G;
+  S = R;
+  norm_G2 = sum (G ^ 2);
+  norm_R2 = norm_G2;
+  
+  inneriter = 1;
+  tt = 0.000000001;
+  while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
+  {
+    if( is_U ) {
+      HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
+      alpha = norm_R2 / sum (S * HS);
+      U = U + alpha * S;  # OK since U is not used in HS
+    } 
+    else {
+      HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
+      alpha = norm_R2 / sum (S * HS);
+      V = V + alpha * S;  # OK since V is not used in HS
+    }
+
+    R = R - alpha * HS;
+    old_norm_R2 = norm_R2;
+    norm_R2 = sum (R ^ 2);
+    S = R + (norm_R2 / old_norm_R2) * S;
+    inneriter = inneriter + 1;
+  }  
+
+  is_U = ! is_U;
+	
+  # check for convergence
+  if( check & (it%%2 == 0) ) {
+    loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+    loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+	
+    loss_dec = (loss_init - loss_cur) / loss_init;
+    print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
+    if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
+      print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
+      converged = TRUE;
+    }
+    loss_init = loss_cur;
+  }
+}
+
+if( check ) {
+  print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if( !converged ) {
+  print ("Max iteration achieved but not converged!");
+}
+
+V = t(V);
+write (U, fileU, format=fmtO);
+write (V, fileV, format=fmtO);
+ 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
new file mode 100644
index 0000000..1d0fce4
--- /dev/null
+++ b/scripts/algorithms/ALS-DS.dml
@@ -0,0 +1,170 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
+# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# V       String   ---      Location to read the input matrix V to be factorized
+# L       String   ---      Location to write the factor matrix L
+# R       String   ---      Location to write the factor matrix R
+# rank    Int      10       Rank of the factorization
+# reg     String   "L2"	    Regularization: 
+#						    "L2" = L2 regularization;
+#                           "wL2" = weighted L2 regularization
+# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
+# maxi    Int      50       Maximum number of iterations
+# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
+# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#							if check is FALSE thr is ignored
+# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- An m x r matrix L, where r is the factorization rank 
+# 2- An r x n matrix R
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileV      = $V;
+fileL	   = $L;
+fileR      = $R;
+
+# Default values of some parameters
+r          = ifdef ($rank, 10);	        # $rank=10;
+reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
+lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter   = ifdef ($maxi, 50);         # $maxi=50;
+check      = ifdef ($check, FALSE);	    # $check=FALSE;
+thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
+fmtO       = ifdef ($fmt, "text");      # $fmt="text";
+
+V = read (fileV);
+
+
+# check the input matrix V, if some rows or columns contain only zeros remove them from V  
+V_nonzero_ind = ppred (V, 0, "!=");
+row_nonzeros = rowSums (V_nonzero_ind);
+col_nonzeros = t (colSums (V_nonzero_ind));
+orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
+orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
+num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
+if (num_zero_rows > 0) {
+	print ("Matrix V contains empty rows! These rows will be removed.");
+	V = removeEmpty (target = V, margin = "rows");
+}
+if (num_zero_cols > 0) {
+	print ("Matrix V contains empty columns! These columns will be removed.");
+	V = removeEmpty (target = V, margin = "cols");
+}
+if (num_zero_rows > 0 | num_zero_cols > 0) {
+	print ("Recomputing nonzero rows and columns!");
+	V_nonzero_ind = ppred (V, 0, "!=");
+	row_nonzeros = rowSums (V_nonzero_ind);
+	col_nonzeros = t (colSums (V_nonzero_ind));	
+}
+
+###### MAIN PART ######
+m = nrow (V);
+n = ncol (V);
+
+# initializing factor matrices
+L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
+R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
+
+# initializing transformed matrices
+Vt = t(V);
+  
+# check for regularization
+if (reg == "L2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+} else if (reg == "wL2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+} else {
+	stop ("wrong regularization! " + reg);
+}
+
+if (check) {
+	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+	print ("-----   Initial train loss: " + loss_init + " -----");
+}
+
+lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+it = 0;
+converged = FALSE;
+while ((it < max_iter) & (!converged)) {
+	it = it + 1;
+	# keep R fixed and update L
+	parfor (i in 1:m) {
+    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
+		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
+		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
+		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
+	}
+  
+	# keep L fixed and update R
+	parfor (j in 1:n) {
+		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
+		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
+		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
+	}
+	
+	# check for convergence
+	if (check) {
+		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+		loss_dec = (loss_init - loss_cur) / loss_init;
+		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
+		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+			print ("----- ALS converged after " + it + " iterations!");
+			converged = TRUE;
+		}
+		loss_init = loss_cur;
+	}
+} # end of while loop
+
+if (check) {
+	print ("-----	Final train loss: " + loss_init + " -----");
+}
+
+if (!converged) {
+   print ("Max iteration achieved but not converged!");
+} 
+
+# inject 0s in L if original V had empty rows
+if (num_zero_rows > 0) {
+	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
+}
+# inject 0s in R if original V had empty rows
+if (num_zero_cols > 0) {
+	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
+}
+Rt = t (R);
+write (L, fileL, format=fmtO);
+write (Rt, fileR, format=fmtO);
+ 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS.dml b/scripts/algorithms/ALS.dml
deleted file mode 100644
index 1d0fce4..0000000
--- a/scripts/algorithms/ALS.dml
+++ /dev/null
@@ -1,170 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
-# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# V       String   ---      Location to read the input matrix V to be factorized
-# L       String   ---      Location to write the factor matrix L
-# R       String   ---      Location to write the factor matrix R
-# rank    Int      10       Rank of the factorization
-# reg     String   "L2"	    Regularization: 
-#						    "L2" = L2 regularization;
-#                           "wL2" = weighted L2 regularization
-# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
-# maxi    Int      50       Maximum number of iterations
-# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
-# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#							if check is FALSE thr is ignored
-# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- An m x r matrix L, where r is the factorization rank 
-# 2- An r x n matrix R
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileV      = $V;
-fileL	   = $L;
-fileR      = $R;
-
-# Default values of some parameters
-r          = ifdef ($rank, 10);	        # $rank=10;
-reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
-lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter   = ifdef ($maxi, 50);         # $maxi=50;
-check      = ifdef ($check, FALSE);	    # $check=FALSE;
-thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
-fmtO       = ifdef ($fmt, "text");      # $fmt="text";
-
-V = read (fileV);
-
-
-# check the input matrix V, if some rows or columns contain only zeros remove them from V  
-V_nonzero_ind = ppred (V, 0, "!=");
-row_nonzeros = rowSums (V_nonzero_ind);
-col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
-num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
-num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
-if (num_zero_rows > 0) {
-	print ("Matrix V contains empty rows! These rows will be removed.");
-	V = removeEmpty (target = V, margin = "rows");
-}
-if (num_zero_cols > 0) {
-	print ("Matrix V contains empty columns! These columns will be removed.");
-	V = removeEmpty (target = V, margin = "cols");
-}
-if (num_zero_rows > 0 | num_zero_cols > 0) {
-	print ("Recomputing nonzero rows and columns!");
-	V_nonzero_ind = ppred (V, 0, "!=");
-	row_nonzeros = rowSums (V_nonzero_ind);
-	col_nonzeros = t (colSums (V_nonzero_ind));	
-}
-
-###### MAIN PART ######
-m = nrow (V);
-n = ncol (V);
-
-# initializing factor matrices
-L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
-R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
-
-# initializing transformed matrices
-Vt = t(V);
-  
-# check for regularization
-if (reg == "L2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-} else if (reg == "wL2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-} else {
-	stop ("wrong regularization! " + reg);
-}
-
-if (check) {
-	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-	print ("-----   Initial train loss: " + loss_init + " -----");
-}
-
-lambda_I = diag (matrix (lambda, rows = r, cols = 1));
-it = 0;
-converged = FALSE;
-while ((it < max_iter) & (!converged)) {
-	it = it + 1;
-	# keep R fixed and update L
-	parfor (i in 1:m) {
-    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
-		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
-		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
-		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
-	}
-  
-	# keep L fixed and update R
-	parfor (j in 1:n) {
-		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
-		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
-		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
-		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
-	}
-	
-	# check for convergence
-	if (check) {
-		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-		loss_dec = (loss_init - loss_cur) / loss_init;
-		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
-		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
-			print ("----- ALS converged after " + it + " iterations!");
-			converged = TRUE;
-		}
-		loss_init = loss_cur;
-	}
-} # end of while loop
-
-if (check) {
-	print ("-----	Final train loss: " + loss_init + " -----");
-}
-
-if (!converged) {
-   print ("Max iteration achieved but not converged!");
-} 
-
-# inject 0s in L if original V had empty rows
-if (num_zero_rows > 0) {
-	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
-}
-# inject 0s in R if original V had empty rows
-if (num_zero_cols > 0) {
-	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
-}
-Rt = t (R);
-write (L, fileL, format=fmtO);
-write (Rt, fileR, format=fmtO);
- 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/staging/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/ALS-CG.dml b/scripts/staging/ALS-CG.dml
deleted file mode 100644
index cd2ba0b..0000000
--- a/scripts/staging/ALS-CG.dml
+++ /dev/null
@@ -1,176 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V 
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT 
-# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---      Location to read the input matrix X to be factorized
-# U       String   ---      Location to write the factor matrix U
-# V       String   ---      Location to write the factor matrix V
-# rank    Int      10       Rank of the factorization
-# reg     String   "L2"	    Regularization: 
-#                           "L2" = L2 regularization;
-#                           "wL2" = weighted L2 regularization
-# lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
-# maxi    Int      50       Maximum number of iterations
-# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating U and V once
-# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-#                           if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#                           if check is FALSE thr is ignored
-# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- An m x r matrix U, where r is the factorization rank 
-# 2- An r x n matrix V
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileX      = $X;
-fileU      = $U;
-fileV      = $V;
-
-# Default values of some parameters
-r          = ifdef ($rank, 10);         # $rank=10;
-reg	       = ifdef ($reg, "L2")         # $reg="L2";
-lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter   = ifdef ($maxi, 50);         # $maxi=50;
-check      = ifdef ($check, TRUE);	    # $check=FALSE;
-thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
-fmtO       = ifdef ($fmt, "text");      # $fmt="text";
- 
- 
-###### MAIN PART ######
-X = read (fileX);
-m = nrow (X);
-n = ncol (X);
-
-# initializing factor matrices
-U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
-V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
-
-W = ppred (X, 0, "!=");
-  
-# check for regularization
-if( reg == "L2" ) {
-  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-  row_nonzeros = matrix(1, nrow(W), 1);
-  col_nonzeros = matrix(1, ncol(W), 1);
-} 
-else if( reg == "wL2" ) {
-  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-  row_nonzeros = rowSums(W);
-  col_nonzeros = t(colSums(W));
-} 
-else {
-  stop ("wrong regularization! " + reg);
-}
-
-# Loss Function with L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#          + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
-# Loss Function with weighted L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#          + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
-
-is_U = TRUE;  # TRUE = Optimize U, FALSE = Optimize V
-maxinneriter = r ; # min (ncol (U), 15);
-
-if( check ) {
-  loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
-  loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-  print ("-----   Initial train loss: " + loss_init + " -----");
-}
-
-it = 0;
-converged = FALSE;
-while( as.integer(it/2) < max_iter & ! converged ) 
-{
-  it = it + 1;
-  if( is_U ) {
-    G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
-  } 
-  else {
-    G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
-  }
-
-  R = -G;
-  S = R;
-  norm_G2 = sum (G ^ 2);
-  norm_R2 = norm_G2;
-  
-  inneriter = 1;
-  tt = 0.000000001;
-  while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
-  {
-    if( is_U ) {
-      HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
-      alpha = norm_R2 / sum (S * HS);
-      U = U + alpha * S;  # OK since U is not used in HS
-    } 
-    else {
-      HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
-      alpha = norm_R2 / sum (S * HS);
-      V = V + alpha * S;  # OK since V is not used in HS
-    }
-
-    R = R - alpha * HS;
-    old_norm_R2 = norm_R2;
-    norm_R2 = sum (R ^ 2);
-    S = R + (norm_R2 / old_norm_R2) * S;
-    inneriter = inneriter + 1;
-  }  
-
-  is_U = ! is_U;
-	
-  # check for convergence
-  if( check & (it%%2 == 0) ) {
-    loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
-    loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-	
-    loss_dec = (loss_init - loss_cur) / loss_init;
-    print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
-    if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
-      print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
-      converged = TRUE;
-    }
-    loss_init = loss_cur;
-  }
-}
-
-if( check ) {
-  print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if( !converged ) {
-  print ("Max iteration achieved but not converged!");
-}
-
-V = t(V);
-write (U, fileU, format=fmtO);
-write (V, fileV, format=fmtO);
- 
\ No newline at end of file