You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/08 20:07:57 UTC
[2/4] incubator-systemml git commit: [SYSTEMML-318] Moved new als-cg
script to algorithms, renamed old als-ds
[SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds
https://issues.apache.org/jira/browse/SYSTEMML-318
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a71bb12a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a71bb12a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a71bb12a
Branch: refs/heads/master
Commit: a71bb12ae68d08f60185bd8a9e09f47b532a4cc2
Parents: 96019cf
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:04:15 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:04:15 2016 -0800
----------------------------------------------------------------------
scripts/algorithms/ALS-CG.dml | 176 +++++++++++++++++++++++++++++++++++++
scripts/algorithms/ALS-DS.dml | 170 +++++++++++++++++++++++++++++++++++
scripts/algorithms/ALS.dml | 170 -----------------------------------
scripts/staging/ALS-CG.dml | 176 -------------------------------------
4 files changed, 346 insertions(+), 346 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-CG.dml b/scripts/algorithms/ALS-CG.dml
new file mode 100644
index 0000000..cd2ba0b
--- /dev/null
+++ b/scripts/algorithms/ALS-CG.dml
@@ -0,0 +1,176 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT
+# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# X String --- Location to read the input matrix X to be factorized
+# U String --- Location to write the factor matrix U
+# V String --- Location to write the factor matrix V
+# rank Int 10 Rank of the factorization
+# reg String "L2" Regularization:
+# "L2" = L2 regularization;
+# "wL2" = weighted L2 regularization
+# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
+# maxi Int 50 Maximum number of iterations
+# check Boolean FALSE Check for convergence after every iteration, i.e., updating U and V once
+# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
+# if the decrease in loss in any two consecutive iterations falls below this threshold;
+# if check is FALSE thr is ignored
+# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# 1- An m x r matrix U, where r is the factorization rank
+# 2- An r x n matrix V
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileX = $X;
+fileU = $U;
+fileV = $V;
+
+# Default values of some parameters
+r = ifdef ($rank, 10); # $rank=10;
+reg = ifdef ($reg, "L2") # $reg="L2";
+lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter = ifdef ($maxi, 50); # $maxi=50;
+check = ifdef ($check, TRUE); # $check=FALSE;
+thr = ifdef ($thr, 0.0001); # $thr=0.0001;
+fmtO = ifdef ($fmt, "text"); # $fmt="text";
+
+
+###### MAIN PART ######
+X = read (fileX);
+m = nrow (X);
+n = ncol (X);
+
+# initializing factor matrices
+U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
+V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
+
+W = ppred (X, 0, "!=");
+
+# check for regularization
+if( reg == "L2" ) {
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+ row_nonzeros = matrix(1, nrow(W), 1);
+ col_nonzeros = matrix(1, ncol(W), 1);
+}
+else if( reg == "wL2" ) {
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+ row_nonzeros = rowSums(W);
+ col_nonzeros = t(colSums(W));
+}
+else {
+ stop ("wrong regularization! " + reg);
+}
+
+# Loss Function with L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
+# Loss Function with weighted L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
+
+is_U = TRUE; # TRUE = Optimize U, FALSE = Optimize V
+maxinneriter = r ; # min (ncol (U), 15);
+
+if( check ) {
+ loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+ print ("----- Initial train loss: " + loss_init + " -----");
+}
+
+it = 0;
+converged = FALSE;
+while( as.integer(it/2) < max_iter & ! converged )
+{
+ it = it + 1;
+ if( is_U ) {
+ G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+ }
+ else {
+ G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
+ }
+
+ R = -G;
+ S = R;
+ norm_G2 = sum (G ^ 2);
+ norm_R2 = norm_G2;
+
+ inneriter = 1;
+ tt = 0.000000001;
+ while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
+ {
+ if( is_U ) {
+ HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
+ alpha = norm_R2 / sum (S * HS);
+ U = U + alpha * S; # OK since U is not used in HS
+ }
+ else {
+ HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
+ alpha = norm_R2 / sum (S * HS);
+ V = V + alpha * S; # OK since V is not used in HS
+ }
+
+ R = R - alpha * HS;
+ old_norm_R2 = norm_R2;
+ norm_R2 = sum (R ^ 2);
+ S = R + (norm_R2 / old_norm_R2) * S;
+ inneriter = inneriter + 1;
+ }
+
+ is_U = ! is_U;
+
+ # check for convergence
+ if( check & (it%%2 == 0) ) {
+ loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+
+ loss_dec = (loss_init - loss_cur) / loss_init;
+ print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
+ if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
+ print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
+ converged = TRUE;
+ }
+ loss_init = loss_cur;
+ }
+}
+
+if( check ) {
+ print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if( !converged ) {
+ print ("Max iteration achieved but not converged!");
+}
+
+V = t(V);
+write (U, fileU, format=fmtO);
+write (V, fileV, format=fmtO);
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
new file mode 100644
index 0000000..1d0fce4
--- /dev/null
+++ b/scripts/algorithms/ALS-DS.dml
@@ -0,0 +1,170 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM
+# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# V String --- Location to read the input matrix V to be factorized
+# L String --- Location to write the factor matrix L
+# R String --- Location to write the factor matrix R
+# rank Int 10 Rank of the factorization
+# reg String "L2" Regularization:
+# "L2" = L2 regularization;
+# "wL2" = weighted L2 regularization
+# lambda Double 0.0 Regularization parameter, no regularization if 0.0
+# maxi Int 50 Maximum number of iterations
+# check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once
+# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
+# if the decrease in loss in any two consecutive iterations falls below this threshold;
+# if check is FALSE thr is ignored
+# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# 1- An m x r matrix L, where r is the factorization rank
+# 2- An r x n matrix R
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileV = $V;
+fileL = $L;
+fileR = $R;
+
+# Default values of some parameters
+r = ifdef ($rank, 10); # $rank=10;
+reg = ifdef ($reg, "L2") # $reg="L2";
+lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter = ifdef ($maxi, 50); # $maxi=50;
+check = ifdef ($check, FALSE); # $check=FALSE;
+thr = ifdef ($thr, 0.0001); # $thr=0.0001;
+fmtO = ifdef ($fmt, "text"); # $fmt="text";
+
+V = read (fileV);
+
+
+# check the input matrix V, if some rows or columns contain only zeros remove them from V
+V_nonzero_ind = ppred (V, 0, "!=");
+row_nonzeros = rowSums (V_nonzero_ind);
+col_nonzeros = t (colSums (V_nonzero_ind));
+orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
+orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
+num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
+if (num_zero_rows > 0) {
+ print ("Matrix V contains empty rows! These rows will be removed.");
+ V = removeEmpty (target = V, margin = "rows");
+}
+if (num_zero_cols > 0) {
+ print ("Matrix V contains empty columns! These columns will be removed.");
+ V = removeEmpty (target = V, margin = "cols");
+}
+if (num_zero_rows > 0 | num_zero_cols > 0) {
+ print ("Recomputing nonzero rows and columns!");
+ V_nonzero_ind = ppred (V, 0, "!=");
+ row_nonzeros = rowSums (V_nonzero_ind);
+ col_nonzeros = t (colSums (V_nonzero_ind));
+}
+
+###### MAIN PART ######
+m = nrow (V);
+n = ncol (V);
+
+# initializing factor matrices
+L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
+R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
+
+# initializing transformed matrices
+Vt = t(V);
+
+# check for regularization
+if (reg == "L2") {
+ print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+} else if (reg == "wL2") {
+ print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+} else {
+ stop ("wrong regularization! " + reg);
+}
+
+if (check) {
+ loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+ print ("----- Initial train loss: " + loss_init + " -----");
+}
+
+lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+it = 0;
+converged = FALSE;
+while ((it < max_iter) & (!converged)) {
+ it = it + 1;
+ # keep R fixed and update L
+ parfor (i in 1:m) {
+ R_nonzero_ind = t(ppred(V[i,],0,"!="));
+ R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");
+ A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
+ L[i,] = t(solve (A1, t(V[i,] %*% R)));
+ }
+
+ # keep L fixed and update R
+ parfor (j in 1:n) {
+ L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+ L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
+ A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
+ R[j,] = t(solve (A2, t(Vt[j,] %*% L)));
+ }
+
+ # check for convergence
+ if (check) {
+ loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+ loss_dec = (loss_init - loss_cur) / loss_init;
+ print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
+ if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+ print ("----- ALS converged after " + it + " iterations!");
+ converged = TRUE;
+ }
+ loss_init = loss_cur;
+ }
+} # end of while loop
+
+if (check) {
+ print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if (!converged) {
+ print ("Max iteration achieved but not converged!");
+}
+
+# inject 0s in L if original V had empty rows
+if (num_zero_rows > 0) {
+ L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
+}
+# inject 0s in R if original V had empty rows
+if (num_zero_cols > 0) {
+ R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R;
+}
+Rt = t (R);
+write (L, fileL, format=fmtO);
+write (Rt, fileR, format=fmtO);
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS.dml b/scripts/algorithms/ALS.dml
deleted file mode 100644
index 1d0fce4..0000000
--- a/scripts/algorithms/ALS.dml
+++ /dev/null
@@ -1,170 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM
-# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME TYPE DEFAULT MEANING
-# ---------------------------------------------------------------------------------------------
-# V String --- Location to read the input matrix V to be factorized
-# L String --- Location to write the factor matrix L
-# R String --- Location to write the factor matrix R
-# rank Int 10 Rank of the factorization
-# reg String "L2" Regularization:
-# "L2" = L2 regularization;
-# "wL2" = weighted L2 regularization
-# lambda Double 0.0 Regularization parameter, no regularization if 0.0
-# maxi Int 50 Maximum number of iterations
-# check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once
-# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
-# if the decrease in loss in any two consecutive iterations falls below this threshold;
-# if check is FALSE thr is ignored
-# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT:
-# 1- An m x r matrix L, where r is the factorization rank
-# 2- An r x n matrix R
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileV = $V;
-fileL = $L;
-fileR = $R;
-
-# Default values of some parameters
-r = ifdef ($rank, 10); # $rank=10;
-reg = ifdef ($reg, "L2") # $reg="L2";
-lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter = ifdef ($maxi, 50); # $maxi=50;
-check = ifdef ($check, FALSE); # $check=FALSE;
-thr = ifdef ($thr, 0.0001); # $thr=0.0001;
-fmtO = ifdef ($fmt, "text"); # $fmt="text";
-
-V = read (fileV);
-
-
-# check the input matrix V, if some rows or columns contain only zeros remove them from V
-V_nonzero_ind = ppred (V, 0, "!=");
-row_nonzeros = rowSums (V_nonzero_ind);
-col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
-num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
-num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
-if (num_zero_rows > 0) {
- print ("Matrix V contains empty rows! These rows will be removed.");
- V = removeEmpty (target = V, margin = "rows");
-}
-if (num_zero_cols > 0) {
- print ("Matrix V contains empty columns! These columns will be removed.");
- V = removeEmpty (target = V, margin = "cols");
-}
-if (num_zero_rows > 0 | num_zero_cols > 0) {
- print ("Recomputing nonzero rows and columns!");
- V_nonzero_ind = ppred (V, 0, "!=");
- row_nonzeros = rowSums (V_nonzero_ind);
- col_nonzeros = t (colSums (V_nonzero_ind));
-}
-
-###### MAIN PART ######
-m = nrow (V);
-n = ncol (V);
-
-# initializing factor matrices
-L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
-R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
-
-# initializing transformed matrices
-Vt = t(V);
-
-# check for regularization
-if (reg == "L2") {
- print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-} else if (reg == "wL2") {
- print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-} else {
- stop ("wrong regularization! " + reg);
-}
-
-if (check) {
- loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
- print ("----- Initial train loss: " + loss_init + " -----");
-}
-
-lambda_I = diag (matrix (lambda, rows = r, cols = 1));
-it = 0;
-converged = FALSE;
-while ((it < max_iter) & (!converged)) {
- it = it + 1;
- # keep R fixed and update L
- parfor (i in 1:m) {
- R_nonzero_ind = t(ppred(V[i,],0,"!="));
- R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");
- A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
- L[i,] = t(solve (A1, t(V[i,] %*% R)));
- }
-
- # keep L fixed and update R
- parfor (j in 1:n) {
- L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
- L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
- A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
- R[j,] = t(solve (A2, t(Vt[j,] %*% L)));
- }
-
- # check for convergence
- if (check) {
- loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
- loss_dec = (loss_init - loss_cur) / loss_init;
- print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
- if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
- print ("----- ALS converged after " + it + " iterations!");
- converged = TRUE;
- }
- loss_init = loss_cur;
- }
-} # end of while loop
-
-if (check) {
- print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if (!converged) {
- print ("Max iteration achieved but not converged!");
-}
-
-# inject 0s in L if original V had empty rows
-if (num_zero_rows > 0) {
- L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
-}
-# inject 0s in R if original V had empty rows
-if (num_zero_cols > 0) {
- R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R;
-}
-Rt = t (R);
-write (L, fileL, format=fmtO);
-write (Rt, fileR, format=fmtO);
-
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/staging/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/ALS-CG.dml b/scripts/staging/ALS-CG.dml
deleted file mode 100644
index cd2ba0b..0000000
--- a/scripts/staging/ALS-CG.dml
+++ /dev/null
@@ -1,176 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT
-# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME TYPE DEFAULT MEANING
-# ---------------------------------------------------------------------------------------------
-# X String --- Location to read the input matrix X to be factorized
-# U String --- Location to write the factor matrix U
-# V String --- Location to write the factor matrix V
-# rank Int 10 Rank of the factorization
-# reg String "L2" Regularization:
-# "L2" = L2 regularization;
-# "wL2" = weighted L2 regularization
-# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
-# maxi Int 50 Maximum number of iterations
-# check Boolean FALSE Check for convergence after every iteration, i.e., updating U and V once
-# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
-# if the decrease in loss in any two consecutive iterations falls below this threshold;
-# if check is FALSE thr is ignored
-# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT:
-# 1- An m x r matrix U, where r is the factorization rank
-# 2- An r x n matrix V
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileX = $X;
-fileU = $U;
-fileV = $V;
-
-# Default values of some parameters
-r = ifdef ($rank, 10); # $rank=10;
-reg = ifdef ($reg, "L2") # $reg="L2";
-lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter = ifdef ($maxi, 50); # $maxi=50;
-check = ifdef ($check, TRUE); # $check=FALSE;
-thr = ifdef ($thr, 0.0001); # $thr=0.0001;
-fmtO = ifdef ($fmt, "text"); # $fmt="text";
-
-
-###### MAIN PART ######
-X = read (fileX);
-m = nrow (X);
-n = ncol (X);
-
-# initializing factor matrices
-U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
-V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
-
-W = ppred (X, 0, "!=");
-
-# check for regularization
-if( reg == "L2" ) {
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
- row_nonzeros = matrix(1, nrow(W), 1);
- col_nonzeros = matrix(1, ncol(W), 1);
-}
-else if( reg == "wL2" ) {
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
- row_nonzeros = rowSums(W);
- col_nonzeros = t(colSums(W));
-}
-else {
- stop ("wrong regularization! " + reg);
-}
-
-# Loss Function with L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
-# Loss Function with weighted L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
-
-is_U = TRUE; # TRUE = Optimize U, FALSE = Optimize V
-maxinneriter = r ; # min (ncol (U), 15);
-
-if( check ) {
- loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
- loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
- print ("----- Initial train loss: " + loss_init + " -----");
-}
-
-it = 0;
-converged = FALSE;
-while( as.integer(it/2) < max_iter & ! converged )
-{
- it = it + 1;
- if( is_U ) {
- G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
- }
- else {
- G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
- }
-
- R = -G;
- S = R;
- norm_G2 = sum (G ^ 2);
- norm_R2 = norm_G2;
-
- inneriter = 1;
- tt = 0.000000001;
- while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
- {
- if( is_U ) {
- HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
- alpha = norm_R2 / sum (S * HS);
- U = U + alpha * S; # OK since U is not used in HS
- }
- else {
- HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
- alpha = norm_R2 / sum (S * HS);
- V = V + alpha * S; # OK since V is not used in HS
- }
-
- R = R - alpha * HS;
- old_norm_R2 = norm_R2;
- norm_R2 = sum (R ^ 2);
- S = R + (norm_R2 / old_norm_R2) * S;
- inneriter = inneriter + 1;
- }
-
- is_U = ! is_U;
-
- # check for convergence
- if( check & (it%%2 == 0) ) {
- loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
- loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-
- loss_dec = (loss_init - loss_cur) / loss_init;
- print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
- if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
- print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
- converged = TRUE;
- }
- loss_init = loss_cur;
- }
-}
-
-if( check ) {
- print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if( !converged ) {
- print ("Max iteration achieved but not converged!");
-}
-
-V = t(V);
-write (U, fileU, format=fmtO);
-write (V, fileV, format=fmtO);
-
\ No newline at end of file