You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/08 20:07:56 UTC
[1/4] incubator-systemml git commit: [SYSTEMML-149] Cleanup remaining
nimble references and custom config
Repository: incubator-systemml
Updated Branches:
refs/heads/master 895610547 -> 19af3f9be
[SYSTEMML-149] Cleanup remaining nimble references and custom config
https://issues.apache.org/jira/browse/SYSTEMML-149
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/96019cf5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/96019cf5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/96019cf5
Branch: refs/heads/master
Commit: 96019cf5e2d62410a631f754c7654e27de842bf7
Parents: 8956105
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 11:52:14 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 11:52:14 2016 -0800
----------------------------------------------------------------------
.../java/org/apache/sysml/conf/DMLConfig.java | 18 +-----------------
.../ExternalFunctionProgramBlock.java | 7 ++-----
.../ExternalFunctionProgramBlockCP.java | 9 +++------
.../sysml/test/integration/AutomatedTestBase.java | 16 +---------------
.../functions/dmlscript/SystemML-config.xml | 6 ------
5 files changed, 7 insertions(+), 49 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index eefafc6..2782487 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -70,14 +70,6 @@ public class DMLConfig
public static final String CP_PARALLEL_MATRIXMULT = "cp.parallel.matrixmult";
public static final String CP_PARALLEL_TEXTIO = "cp.parallel.textio";
- //obsolete nimble configuration (removed 06/24/2015)
- //public static final String NUM_MERGE_TASKS = "NumMergeTasks";
- //public static final String NUM_SOW_THREADS = "NumberOfSowThreads";
- //public static final String NUM_REAP_THREADS = "NumberOfReapThreads";
- //public static final String SOWER_WAIT_INTERVAL = "SowerWaitInterval";
- //public static final String REAPER_WAIT_INTERVAL = "ReaperWaitInterval";
- //public static final String NIMBLE_SCRATCH = "NimbleScratch";
-
//internal config
public static final String DEFAULT_SHARED_DIR_PERMISSION = "777"; //for local fs and DFS
public static String LOCAL_MR_MODE_STAGING_DIR = null;
@@ -101,12 +93,6 @@ public class DMLConfig
_defaultVals.put(YARN_APPMASTERMEM, "2048" );
_defaultVals.put(YARN_MAPREDUCEMEM, "-1" );
_defaultVals.put(YARN_APPQUEUE, "default" );
- //_defaultVals.put(NUM_MERGE_TASKS, "4" );
- //_defaultVals.put(NUM_SOW_THREADS, "1" );
- //_defaultVals.put(NUM_REAP_THREADS, "1" );
- //_defaultVals.put(SOWER_WAIT_INTERVAL, "1000" );
- //_defaultVals.put(REAPER_WAIT_INTERVAL, "1000" );
- //_defaultVals.put(NIMBLE_SCRATCH, "nimbleoutput" );
_defaultVals.put(CP_PARALLEL_MATRIXMULT, "true" );
_defaultVals.put(CP_PARALLEL_TEXTIO, "true" );
}
@@ -419,9 +405,7 @@ public class DMLConfig
String[] tmpConfig = new String[] {
LOCAL_TMP_DIR,SCRATCH_SPACE,OPTIMIZATION_LEVEL,
NUM_REDUCERS, DEFAULT_BLOCK_SIZE,
- YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM,
- //NUM_MERGE_TASKS, NUM_SOW_THREADS,NUM_REAP_THREADS,
- //SOWER_WAIT_INTERVAL,REAPER_WAIT_INTERVAL,NIMBLE_SCRATCH
+ YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM,
CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO
};
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
index b3fbf8c..03a5d44 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
@@ -98,8 +98,6 @@ public class ExternalFunctionProgramBlock extends FunctionProgramBlock
{
super(prog, inputParams, outputParams);
_baseDir = baseDir;
-
- //NOTE: no need to setup nimble queue for CP external functions
}
public ExternalFunctionProgramBlock(Program prog,
@@ -610,10 +608,9 @@ public class ExternalFunctionProgramBlock extends FunctionProgramBlock
/**
* Method to execute an external function invocation instruction.
*
+ * @param ec
* @param inst
- * @param dQueue
- * @throws NimbleCheckedRuntimeException
- * @throws DMLRuntimeException
+ * @throws DMLRuntimeException
*/
@SuppressWarnings("unchecked")
public void executeInstruction(ExecutionContext ec, ExternalFunctionInvocationInstruction inst)
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
index a143337..ad0dbcd 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
@@ -110,18 +110,15 @@ public class ExternalFunctionProgramBlockCP extends ExternalFunctionProgramBlock
}
/**
- * Executes the external function instruction without the use of NIMBLE tasks.
+ * Executes the external function instruction.
*
- * @param inst
- * @throws DMLRuntimeException
- * @throws NimbleCheckedRuntimeException
*/
@Override
public void executeInstruction(ExecutionContext ec, ExternalFunctionInvocationInstruction inst)
throws DMLRuntimeException
{
- // After removal of nimble, we moved the code of ExternalFunctionProgramBlockCP to
- // ExternalFunctionProgramBlock and hence hence both types of external functions can
+ // After the udf framework rework, we moved the code of ExternalFunctionProgramBlockCP
+ // to ExternalFunctionProgramBlock and hence hence both types of external functions can
// share the same code path here.
super.executeInstruction(ec, inst);
}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
index efeb24e..9688754 100644
--- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
@@ -828,21 +828,7 @@ public abstract class AutomatedTestBase
String configTemplate = FileUtils.readFileToString(CONFIG_TEMPLATE_FILE,
"UTF-8");
- // *** HACK ALERT *** HACK ALERT *** HACK ALERT ***
- // Nimble does not accept paths that use backslash as the separator character.
- // Since some of the tests use Nimble, we use forward slash in the paths that
- // we put into the config file.
- String localTempForwardSlash = curLocalTempDir.getPath().replace(File.separator, "/");
- String configContents = configTemplate.replace("<scratch>scratch_space</scratch>",
- String.format("<scratch>%s/scratch_space</scratch>", localTempForwardSlash));
- configContents = configContents.replace("<localtmpdir>/tmp/systemml</localtmpdir>",
- String.format("<localtmpdir>%s/localtmp</localtmpdir>", localTempForwardSlash));
- configContents = configContents.replace("<NimbleScratch>nimbleoutput</NimbleScratch>",
- String.format("<NimbleScratch>%s/nimbleoutput</NimbleScratch>",
- localTempForwardSlash));
- // *** END HACK ***
-
- FileUtils.write(getCurConfigFile(), configContents, "UTF-8");
+ FileUtils.write(getCurConfigFile(), configTemplate, "UTF-8");
System.out.printf(
"This test case will use SystemML config file %s\n",
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/test/scripts/functions/dmlscript/SystemML-config.xml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/dmlscript/SystemML-config.xml b/src/test/scripts/functions/dmlscript/SystemML-config.xml
index 8a9a457..dfcbd87 100644
--- a/src/test/scripts/functions/dmlscript/SystemML-config.xml
+++ b/src/test/scripts/functions/dmlscript/SystemML-config.xml
@@ -20,12 +20,6 @@
<numreducers>10</numreducers>
<scratch>scratch_space</scratch>
<defaultblocksize>1000</defaultblocksize>
-<NumMergeTasks>4</NumMergeTasks>
-<NumberOfSowThreads>1</NumberOfSowThreads>
-<NumberOfReapThreads>1</NumberOfReapThreads>
-<SowerWaitInterval>1000</SowerWaitInterval>
-<ReaperWaitInterval>1000</ReaperWaitInterval>
-<NimbleScratch>nimbleoutput</NimbleScratch>
<cp.parallel.matrixmult>true</cp.parallel.matrixmult>
<cp.parallel.textio>false</cp.parallel.textio>
</root>
[2/4] incubator-systemml git commit: [SYSTEMML-318] Moved new als-cg
script to algorithms, renamed old als-ds
Posted by mb...@apache.org.
[SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds
https://issues.apache.org/jira/browse/SYSTEMML-318
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a71bb12a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a71bb12a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a71bb12a
Branch: refs/heads/master
Commit: a71bb12ae68d08f60185bd8a9e09f47b532a4cc2
Parents: 96019cf
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:04:15 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:04:15 2016 -0800
----------------------------------------------------------------------
scripts/algorithms/ALS-CG.dml | 176 +++++++++++++++++++++++++++++++++++++
scripts/algorithms/ALS-DS.dml | 170 +++++++++++++++++++++++++++++++++++
scripts/algorithms/ALS.dml | 170 -----------------------------------
scripts/staging/ALS-CG.dml | 176 -------------------------------------
4 files changed, 346 insertions(+), 346 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-CG.dml b/scripts/algorithms/ALS-CG.dml
new file mode 100644
index 0000000..cd2ba0b
--- /dev/null
+++ b/scripts/algorithms/ALS-CG.dml
@@ -0,0 +1,176 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT
+# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# X String --- Location to read the input matrix X to be factorized
+# U String --- Location to write the factor matrix U
+# V String --- Location to write the factor matrix V
+# rank Int 10 Rank of the factorization
+# reg String "L2" Regularization:
+# "L2" = L2 regularization;
+# "wL2" = weighted L2 regularization
+# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
+# maxi Int 50 Maximum number of iterations
+# check Boolean FALSE Check for convergence after every iteration, i.e., updating U and V once
+# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
+# if the decrease in loss in any two consecutive iterations falls below this threshold;
+# if check is FALSE thr is ignored
+# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# 1- An m x r matrix U, where r is the factorization rank
+# 2- An r x n matrix V
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileX = $X;
+fileU = $U;
+fileV = $V;
+
+# Default values of some parameters
+r = ifdef ($rank, 10); # $rank=10;
+reg = ifdef ($reg, "L2") # $reg="L2";
+lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter = ifdef ($maxi, 50); # $maxi=50;
+check = ifdef ($check, TRUE); # $check=FALSE;
+thr = ifdef ($thr, 0.0001); # $thr=0.0001;
+fmtO = ifdef ($fmt, "text"); # $fmt="text";
+
+
+###### MAIN PART ######
+X = read (fileX);
+m = nrow (X);
+n = ncol (X);
+
+# initializing factor matrices
+U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
+V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
+
+W = ppred (X, 0, "!=");
+
+# check for regularization
+if( reg == "L2" ) {
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+ row_nonzeros = matrix(1, nrow(W), 1);
+ col_nonzeros = matrix(1, ncol(W), 1);
+}
+else if( reg == "wL2" ) {
+ print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+ row_nonzeros = rowSums(W);
+ col_nonzeros = t(colSums(W));
+}
+else {
+ stop ("wrong regularization! " + reg);
+}
+
+# Loss Function with L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
+# Loss Function with weighted L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
+
+is_U = TRUE; # TRUE = Optimize U, FALSE = Optimize V
+maxinneriter = r ; # min (ncol (U), 15);
+
+if( check ) {
+ loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+ print ("----- Initial train loss: " + loss_init + " -----");
+}
+
+it = 0;
+converged = FALSE;
+while( as.integer(it/2) < max_iter & ! converged )
+{
+ it = it + 1;
+ if( is_U ) {
+ G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+ }
+ else {
+ G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
+ }
+
+ R = -G;
+ S = R;
+ norm_G2 = sum (G ^ 2);
+ norm_R2 = norm_G2;
+
+ inneriter = 1;
+ tt = 0.000000001;
+ while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
+ {
+ if( is_U ) {
+ HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
+ alpha = norm_R2 / sum (S * HS);
+ U = U + alpha * S; # OK since U is not used in HS
+ }
+ else {
+ HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
+ alpha = norm_R2 / sum (S * HS);
+ V = V + alpha * S; # OK since V is not used in HS
+ }
+
+ R = R - alpha * HS;
+ old_norm_R2 = norm_R2;
+ norm_R2 = sum (R ^ 2);
+ S = R + (norm_R2 / old_norm_R2) * S;
+ inneriter = inneriter + 1;
+ }
+
+ is_U = ! is_U;
+
+ # check for convergence
+ if( check & (it%%2 == 0) ) {
+ loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+ loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+
+ loss_dec = (loss_init - loss_cur) / loss_init;
+ print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
+ if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
+ print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
+ converged = TRUE;
+ }
+ loss_init = loss_cur;
+ }
+}
+
+if( check ) {
+ print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if( !converged ) {
+ print ("Max iteration achieved but not converged!");
+}
+
+V = t(V);
+write (U, fileU, format=fmtO);
+write (V, fileV, format=fmtO);
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
new file mode 100644
index 0000000..1d0fce4
--- /dev/null
+++ b/scripts/algorithms/ALS-DS.dml
@@ -0,0 +1,170 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM
+# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# V String --- Location to read the input matrix V to be factorized
+# L String --- Location to write the factor matrix L
+# R String --- Location to write the factor matrix R
+# rank Int 10 Rank of the factorization
+# reg String "L2" Regularization:
+# "L2" = L2 regularization;
+# "wL2" = weighted L2 regularization
+# lambda Double 0.0 Regularization parameter, no regularization if 0.0
+# maxi Int 50 Maximum number of iterations
+# check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once
+# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
+# if the decrease in loss in any two consecutive iterations falls below this threshold;
+# if check is FALSE thr is ignored
+# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# 1- An m x r matrix L, where r is the factorization rank
+# 2- An r x n matrix R
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileV = $V;
+fileL = $L;
+fileR = $R;
+
+# Default values of some parameters
+r = ifdef ($rank, 10); # $rank=10;
+reg = ifdef ($reg, "L2") # $reg="L2";
+lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter = ifdef ($maxi, 50); # $maxi=50;
+check = ifdef ($check, FALSE); # $check=FALSE;
+thr = ifdef ($thr, 0.0001); # $thr=0.0001;
+fmtO = ifdef ($fmt, "text"); # $fmt="text";
+
+V = read (fileV);
+
+
+# check the input matrix V, if some rows or columns contain only zeros remove them from V
+V_nonzero_ind = ppred (V, 0, "!=");
+row_nonzeros = rowSums (V_nonzero_ind);
+col_nonzeros = t (colSums (V_nonzero_ind));
+orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
+orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
+num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
+if (num_zero_rows > 0) {
+ print ("Matrix V contains empty rows! These rows will be removed.");
+ V = removeEmpty (target = V, margin = "rows");
+}
+if (num_zero_cols > 0) {
+ print ("Matrix V contains empty columns! These columns will be removed.");
+ V = removeEmpty (target = V, margin = "cols");
+}
+if (num_zero_rows > 0 | num_zero_cols > 0) {
+ print ("Recomputing nonzero rows and columns!");
+ V_nonzero_ind = ppred (V, 0, "!=");
+ row_nonzeros = rowSums (V_nonzero_ind);
+ col_nonzeros = t (colSums (V_nonzero_ind));
+}
+
+###### MAIN PART ######
+m = nrow (V);
+n = ncol (V);
+
+# initializing factor matrices
+L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
+R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
+
+# initializing transformed matrices
+Vt = t(V);
+
+# check for regularization
+if (reg == "L2") {
+ print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+} else if (reg == "wL2") {
+ print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+} else {
+ stop ("wrong regularization! " + reg);
+}
+
+if (check) {
+ loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+ print ("----- Initial train loss: " + loss_init + " -----");
+}
+
+lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+it = 0;
+converged = FALSE;
+while ((it < max_iter) & (!converged)) {
+ it = it + 1;
+ # keep R fixed and update L
+ parfor (i in 1:m) {
+ R_nonzero_ind = t(ppred(V[i,],0,"!="));
+ R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");
+ A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
+ L[i,] = t(solve (A1, t(V[i,] %*% R)));
+ }
+
+ # keep L fixed and update R
+ parfor (j in 1:n) {
+ L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+ L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
+ A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
+ R[j,] = t(solve (A2, t(Vt[j,] %*% L)));
+ }
+
+ # check for convergence
+ if (check) {
+ loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+ loss_dec = (loss_init - loss_cur) / loss_init;
+ print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
+ if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+ print ("----- ALS converged after " + it + " iterations!");
+ converged = TRUE;
+ }
+ loss_init = loss_cur;
+ }
+} # end of while loop
+
+if (check) {
+ print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if (!converged) {
+ print ("Max iteration achieved but not converged!");
+}
+
+# inject 0s in L if original V had empty rows
+if (num_zero_rows > 0) {
+ L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
+}
+# inject 0s in R if original V had empty rows
+if (num_zero_cols > 0) {
+ R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R;
+}
+Rt = t (R);
+write (L, fileL, format=fmtO);
+write (Rt, fileR, format=fmtO);
+
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS.dml b/scripts/algorithms/ALS.dml
deleted file mode 100644
index 1d0fce4..0000000
--- a/scripts/algorithms/ALS.dml
+++ /dev/null
@@ -1,170 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM
-# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME TYPE DEFAULT MEANING
-# ---------------------------------------------------------------------------------------------
-# V String --- Location to read the input matrix V to be factorized
-# L String --- Location to write the factor matrix L
-# R String --- Location to write the factor matrix R
-# rank Int 10 Rank of the factorization
-# reg String "L2" Regularization:
-# "L2" = L2 regularization;
-# "wL2" = weighted L2 regularization
-# lambda Double 0.0 Regularization parameter, no regularization if 0.0
-# maxi Int 50 Maximum number of iterations
-# check Boolean FALSE Check for convergence after every iteration, i.e., updating L and R once
-# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
-# if the decrease in loss in any two consecutive iterations falls below this threshold;
-# if check is FALSE thr is ignored
-# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT:
-# 1- An m x r matrix L, where r is the factorization rank
-# 2- An r x n matrix R
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileV = $V;
-fileL = $L;
-fileR = $R;
-
-# Default values of some parameters
-r = ifdef ($rank, 10); # $rank=10;
-reg = ifdef ($reg, "L2") # $reg="L2";
-lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter = ifdef ($maxi, 50); # $maxi=50;
-check = ifdef ($check, FALSE); # $check=FALSE;
-thr = ifdef ($thr, 0.0001); # $thr=0.0001;
-fmtO = ifdef ($fmt, "text"); # $fmt="text";
-
-V = read (fileV);
-
-
-# check the input matrix V, if some rows or columns contain only zeros remove them from V
-V_nonzero_ind = ppred (V, 0, "!=");
-row_nonzeros = rowSums (V_nonzero_ind);
-col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
-num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
-num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
-if (num_zero_rows > 0) {
- print ("Matrix V contains empty rows! These rows will be removed.");
- V = removeEmpty (target = V, margin = "rows");
-}
-if (num_zero_cols > 0) {
- print ("Matrix V contains empty columns! These columns will be removed.");
- V = removeEmpty (target = V, margin = "cols");
-}
-if (num_zero_rows > 0 | num_zero_cols > 0) {
- print ("Recomputing nonzero rows and columns!");
- V_nonzero_ind = ppred (V, 0, "!=");
- row_nonzeros = rowSums (V_nonzero_ind);
- col_nonzeros = t (colSums (V_nonzero_ind));
-}
-
-###### MAIN PART ######
-m = nrow (V);
-n = ncol (V);
-
-# initializing factor matrices
-L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
-R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
-
-# initializing transformed matrices
-Vt = t(V);
-
-# check for regularization
-if (reg == "L2") {
- print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-} else if (reg == "wL2") {
- print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-} else {
- stop ("wrong regularization! " + reg);
-}
-
-if (check) {
- loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
- print ("----- Initial train loss: " + loss_init + " -----");
-}
-
-lambda_I = diag (matrix (lambda, rows = r, cols = 1));
-it = 0;
-converged = FALSE;
-while ((it < max_iter) & (!converged)) {
- it = it + 1;
- # keep R fixed and update L
- parfor (i in 1:m) {
- R_nonzero_ind = t(ppred(V[i,],0,"!="));
- R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");
- A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
- L[i,] = t(solve (A1, t(V[i,] %*% R)));
- }
-
- # keep L fixed and update R
- parfor (j in 1:n) {
- L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
- L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
- A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
- R[j,] = t(solve (A2, t(Vt[j,] %*% L)));
- }
-
- # check for convergence
- if (check) {
- loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
- loss_dec = (loss_init - loss_cur) / loss_init;
- print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
- if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
- print ("----- ALS converged after " + it + " iterations!");
- converged = TRUE;
- }
- loss_init = loss_cur;
- }
-} # end of while loop
-
-if (check) {
- print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if (!converged) {
- print ("Max iteration achieved but not converged!");
-}
-
-# inject 0s in L if original V had empty rows
-if (num_zero_rows > 0) {
- L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
-}
-# inject 0s in R if original V had empty rows
-if (num_zero_cols > 0) {
- R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R;
-}
-Rt = t (R);
-write (L, fileL, format=fmtO);
-write (Rt, fileR, format=fmtO);
-
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/staging/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/ALS-CG.dml b/scripts/staging/ALS-CG.dml
deleted file mode 100644
index cd2ba0b..0000000
--- a/scripts/staging/ALS-CG.dml
+++ /dev/null
@@ -1,176 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT
-# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME TYPE DEFAULT MEANING
-# ---------------------------------------------------------------------------------------------
-# X String --- Location to read the input matrix X to be factorized
-# U String --- Location to write the factor matrix U
-# V String --- Location to write the factor matrix V
-# rank Int 10 Rank of the factorization
-# reg String "L2" Regularization:
-# "L2" = L2 regularization;
-# "wL2" = weighted L2 regularization
-# lambda Double 0.000001 Regularization parameter, no regularization if 0.0
-# maxi Int 50 Maximum number of iterations
-# check Boolean FALSE Check for convergence after every iteration, i.e., updating U and V once
-# thr Double 0.0001 Assuming check is set to TRUE, the algorithm stops and convergence is declared
-# if the decrease in loss in any two consecutive iterations falls below this threshold;
-# if check is FALSE thr is ignored
-# fmt String "text" The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT:
-# 1- An m x r matrix U, where r is the factorization rank
-# 2- An r x n matrix V
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileX = $X;
-fileU = $U;
-fileV = $V;
-
-# Default values of some parameters
-r = ifdef ($rank, 10); # $rank=10;
-reg = ifdef ($reg, "L2") # $reg="L2";
-lambda = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter = ifdef ($maxi, 50); # $maxi=50;
-check = ifdef ($check, TRUE); # $check=FALSE;
-thr = ifdef ($thr, 0.0001); # $thr=0.0001;
-fmtO = ifdef ($fmt, "text"); # $fmt="text";
-
-
-###### MAIN PART ######
-X = read (fileX);
-m = nrow (X);
-n = ncol (X);
-
-# initializing factor matrices
-U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
-V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
-
-W = ppred (X, 0, "!=");
-
-# check for regularization
-if( reg == "L2" ) {
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
- row_nonzeros = matrix(1, nrow(W), 1);
- col_nonzeros = matrix(1, ncol(W), 1);
-}
-else if( reg == "wL2" ) {
- print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
- row_nonzeros = rowSums(W);
- col_nonzeros = t(colSums(W));
-}
-else {
- stop ("wrong regularization! " + reg);
-}
-
-# Loss Function with L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-# + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
-# Loss Function with weighted L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-# + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
-
-is_U = TRUE; # TRUE = Optimize U, FALSE = Optimize V
-maxinneriter = r ; # min (ncol (U), 15);
-
-if( check ) {
- loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
- loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
- print ("----- Initial train loss: " + loss_init + " -----");
-}
-
-it = 0;
-converged = FALSE;
-while( as.integer(it/2) < max_iter & ! converged )
-{
- it = it + 1;
- if( is_U ) {
- G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
- }
- else {
- G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
- }
-
- R = -G;
- S = R;
- norm_G2 = sum (G ^ 2);
- norm_R2 = norm_G2;
-
- inneriter = 1;
- tt = 0.000000001;
- while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
- {
- if( is_U ) {
- HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
- alpha = norm_R2 / sum (S * HS);
- U = U + alpha * S; # OK since U is not used in HS
- }
- else {
- HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
- alpha = norm_R2 / sum (S * HS);
- V = V + alpha * S; # OK since V is not used in HS
- }
-
- R = R - alpha * HS;
- old_norm_R2 = norm_R2;
- norm_R2 = sum (R ^ 2);
- S = R + (norm_R2 / old_norm_R2) * S;
- inneriter = inneriter + 1;
- }
-
- is_U = ! is_U;
-
- # check for convergence
- if( check & (it%%2 == 0) ) {
- loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
- loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-
- loss_dec = (loss_init - loss_cur) / loss_init;
- print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
- if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
- print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
- converged = TRUE;
- }
- loss_init = loss_cur;
- }
-}
-
-if( check ) {
- print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if( !converged ) {
- print ("Max iteration achieved but not converged!");
-}
-
-V = t(V);
-write (U, fileU, format=fmtO);
-write (V, fileV, format=fmtO);
-
\ No newline at end of file
[3/4] incubator-systemml git commit: New probabilistic pca (ppca)
script, still in staging; by Narine
Posted by mb...@apache.org.
New probabilistic pca (ppca) script, still in staging; by Narine
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/83a5b42d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/83a5b42d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/83a5b42d
Branch: refs/heads/master
Commit: 83a5b42d8d0baf61b6a4eecd5c1c18d16dada9e3
Parents: a71bb12
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:49:22 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:49:22 2016 -0800
----------------------------------------------------------------------
scripts/staging/PPCA.dml | 160 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 160 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/83a5b42d/scripts/staging/PPCA.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/PPCA.dml b/scripts/staging/PPCA.dml
new file mode 100644
index 0000000..667c709
--- /dev/null
+++ b/scripts/staging/PPCA.dml
@@ -0,0 +1,160 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# This script performs Probabilistic Principal Component Analysis (PCA) on the given input data.
+# It is based on paper: sPCA: Scalable Principal Component Analysis for Big Data on Distributed
+# Platforms. Tarek Elgamal et.al.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# X String --- location to read the matrix X input matrix
+# k Int --- indicates dimension of the new vector space constructed from eigen vectors
+# tolobj Int 0.00001 objective function tolerance value to stop ppca algorithm
+# tolrecerr Int 0.02 reconstruction error tolerance value to stop the algorithm
+# iter Int 10 maximum number of iterations
+# fmt String 'text' output format of results PPCA such as "text" or "csv"
+# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X C=/OUTPUT_DIR/C V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
+# ---------------------------------------------------------------------------------------------
+# OUTPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# C Matrix --- principal components
+# V Matrix --- eigenvalues / eigenvalues of principal components
+#
+
+X = read($X);
+
+fileC = $C;
+fileV = $V;
+
+k = ifdef($k, ncol(X));
+iter = ifdef($iter, 10);
+tolobj = ifdef($tolobj, 0.00001);
+tolrecerr = ifdef($tolrecerr, 0.02);
+fmt0 = ifdef($fmt, "text");
+
+n = nrow(X);
+m = ncol(X);
+
+#initializing principal components matrix
+C = rand(rows=m, cols=k, pdf="normal");
+ss = rand(rows=1, cols=1, pdf="normal");
+ss = as.scalar(ss);
+ssPrev = ss;
+
+# best selected principle components - with the lowest reconstruction error
+PC = C;
+
+# initilizing reconstruction error
+RE = tolrecerr+1;
+REBest = RE;
+
+Z = matrix(0,rows=1,cols=1);
+
+#Objective function value
+ObjRelChng = tolobj+1;
+
+# mean centered input matrix - dim -> [n,m]
+Xm = X - colMeans(X);
+
+#I -> k x k
+ITMP = matrix(1,rows=k,cols=1);
+I = diag(ITMP);
+
+i = 0;
+while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
+ #Estimation step - Covariance matrix
+ #M -> k x k
+ M = t(C) %*% C + I*ss;
+
+ #Auxilary matrix with n latent variables
+ # Z -> n x k
+ Z = Xm %*% (C %*% inv(M));
+
+ #ZtZ -> k x k
+ ZtZ = t(Z) %*% Z + inv(M)*ss;
+
+ #XtZ -> m x k
+ XtZ = t(Xm) %*% Z;
+
+ #Maximization step
+ #C -> m x k
+ ZtZ_sum = sum(ZtZ); #+n*inv(M));
+ C = XtZ/ZtZ_sum;
+
+ #ss2 -> 1 x 1
+ ss2 = trace(ZtZ * (t(C) %*% C));
+
+ #ss3 -> 1 x 1
+ ss3 = sum((Z %*% t(C)) %*% t(Xm));
+
+ #Frobenius norm of reconstruction error -> Euclidean norm
+ #Fn -> 1 x 1
+ Fn = sum(Xm*Xm);
+
+ #ss -> 1 x 1
+ ss = (Fn + ss2 - 2*ss3)/(n*m);
+
+ #calculating objective function relative change
+ ObjRelChng = abs(1 - ss/ssPrev);
+ #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
+
+ #Reconstruction error
+ R = ((Z %*% t(C)) - Xm);
+
+ #calculate the error
+ #TODO rethink calculation of reconstruction error ....
+ #1-Norm of reconstruction error - a big dense matrix
+ #RE -> n x m
+ RE = abs(sum(R)/sum(Xm));
+ if (RE < REBest){
+ PC = C;
+ REBest = RE;
+ }
+ #print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2 +" ) - 2*ss3( " + ss3 + " ), Reconstruction Error: " + RE);
+
+ ssPrev = ss;
+ i = i+1;
+}
+print("Objective Relative Change: " + ObjRelChng);
+print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
+
+# reconstructs data
+# RD -> n x k
+RD = X %*% PC;
+
+# calculate eigenvalues - principle component variance
+RDMean = colMeans(RD);
+V = t(colMeans(RD*RD) - (RDMean*RDMean));
+
+# sorting eigenvalues and eigenvectors in decreasing order
+V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
+VF_decr = table(seq(1,nrow(V)),V_decr_idx);
+V = VF_decr %*% V;
+PC = PC %*% VF_decr;
+
+# writing principal components
+write(PC, fileC, format=fmt0);
+# writing eigen values/pc variance
+write(V, fileV, format=fmt0);
[4/4] incubator-systemml git commit: New simplification rewrite
'pushdown sum on additive binary', for ppca
Posted by mb...@apache.org.
New simplification rewrite 'pushdown sum on additive binary', for ppca
For example, we now rewrite sum(A+B) -> sum(A)+sum(B) and sum(A-B) ->
sum(A)-sum(B) if dims(A)==dims(B) and dt(A)==dt(B)==MATRIX. This
prevents an unnecessary intermediate, reduces the number of scans from 3
reads / 1 write to two reads, and simplifies binary/unary operations to
pure unary operations that are easier to parallelize. Down the road, we
can generalize this to matrix-vector and matrix-scalar operations too.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/19af3f9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/19af3f9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/19af3f9b
Branch: refs/heads/master
Commit: 19af3f9be3736853ff0ccae4e2b074a4b5905c03
Parents: 83a5b42
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 8 11:07:18 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Fri Jan 8 11:07:18 2016 -0800
----------------------------------------------------------------------
.../sysml/hops/rewrite/HopRewriteUtils.java | 30 ++++
.../RewriteAlgebraicSimplificationDynamic.java | 51 ++++++
.../aggregate/PushdownSumBinaryTest.java | 163 +++++++++++++++++++
.../scripts/functions/aggregate/PushdownSum1.R | 34 ++++
.../functions/aggregate/PushdownSum1.dml | 25 +++
.../scripts/functions/aggregate/PushdownSum2.R | 34 ++++
.../functions/aggregate/PushdownSum2.dml | 25 +++
7 files changed, 362 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 95ddf0f..891c0b1 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -25,6 +25,7 @@ import java.util.HashMap;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
import org.apache.sysml.hops.AggBinaryOp;
+import org.apache.sysml.hops.AggUnaryOp;
import org.apache.sysml.hops.BinaryOp;
import org.apache.sysml.hops.DataOp;
import org.apache.sysml.hops.Hop;
@@ -32,6 +33,7 @@ import org.apache.sysml.hops.Hop.AggOp;
import org.apache.sysml.hops.Hop.DataGenMethod;
import org.apache.sysml.hops.DataGenOp;
import org.apache.sysml.hops.Hop.DataOpTypes;
+import org.apache.sysml.hops.Hop.Direction;
import org.apache.sysml.hops.Hop.FileFormatTypes;
import org.apache.sysml.hops.Hop.OpOp2;
import org.apache.sysml.hops.Hop.ParamBuiltinOp;
@@ -551,6 +553,34 @@ public class HopRewriteUtils
/**
*
+ * @param input
+ * @return
+ */
+ public static AggUnaryOp createSum( Hop input ) {
+ return createAggUnaryOp(input, AggOp.SUM, Direction.RowCol);
+ }
+
+ /**
+ *
+ * @param input
+ * @param op
+ * @param dir
+ * @return
+ */
+ public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir )
+ {
+ DataType dt = (dir==Direction.RowCol) ? DataType.SCALAR : input.getDataType();
+
+ AggUnaryOp auop = new AggUnaryOp(input.getName(), dt, input.getValueType(), op, dir, input);
+ auop.setRowsInBlock(input.getRowsInBlock());
+ auop.setColsInBlock(input.getColsInBlock());
+ auop.refreshSizeInformation();
+
+ return auop;
+ }
+
+ /**
+ *
* @param left
* @param right
* @return
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
index 7c4a67a..31c394b 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
@@ -166,6 +166,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
hi = simplifyDiagMatrixMult(hop, hi, i); //e.g., diag(X%*%Y)->rowSums(X*t(Y)); if col vector
hi = simplifySumDiagToTrace(hi); //e.g., sum(diag(X)) -> trace(X); if col vector
hi = pushdownBinaryOperationOnDiag(hop, hi, i); //e.g., diag(X)*7 -> diag(X*7); if col vector
+ hi = pushdownSumOnAdditiveBinary(hop, hi, i); //e.g., sum(A+B) -> sum(A)+sum(B); if dims(A)==dims(B)
hi = simplifyWeightedSquaredLoss(hop, hi, i); //e.g., sum(W * (X - U %*% t(V)) ^ 2) -> wsl(X, U, t(V), W, true),
hi = simplifyWeightedSigmoidMMChains(hop, hi, i); //e.g., W * sigmoid(Y%*%t(X)) -> wsigmoid(W, Y, t(X), type)
hi = simplifyWeightedDivMM(hop, hi, i); //e.g., t(U) %*% (X/(U%*%t(V))) -> wdivmm(X, U, t(V), left)
@@ -1349,6 +1350,56 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
return hi;
}
+ /**
+ * patterns: sum(A+B)->sum(A)+sum(B); sum(A-B)->sum(A)-sum(B)
+ *
+ * @param parent
+ * @param hi
+ * @param pos
+ * @return
+ */
+ private Hop pushdownSumOnAdditiveBinary(Hop parent, Hop hi, int pos)
+ {
+ //all patterns headed by fiull sum over binary operation
+ if( hi instanceof AggUnaryOp //full sum root over binaryop
+ && ((AggUnaryOp)hi).getDirection()==Direction.RowCol
+ && ((AggUnaryOp)hi).getOp() == AggOp.SUM
+ && hi.getInput().get(0) instanceof BinaryOp
+ && hi.getInput().get(0).getParent().size()==1 ) //single parent
+ {
+ BinaryOp bop = (BinaryOp) hi.getInput().get(0);
+ Hop left = bop.getInput().get(0);
+ Hop right = bop.getInput().get(1);
+
+ if( HopRewriteUtils.isEqualSize(left, right) //dims(A) == dims(B)
+ && left.getDataType() == DataType.MATRIX
+ && right.getDataType() == DataType.MATRIX )
+ {
+ OpOp2 applyOp = ( bop.getOp() == OpOp2.PLUS //pattern a: sum(A+B)->sum(A)+sum(B)
+ || bop.getOp() == OpOp2.MINUS ) //pattern b: sum(A-B)->sum(A)-sum(B)
+ ? bop.getOp() : null;
+
+ if( applyOp != null ) {
+ //create new subdag sum(A) bop sum(B)
+ AggUnaryOp sum1 = HopRewriteUtils.createSum(left);
+ AggUnaryOp sum2 = HopRewriteUtils.createSum(right);
+ BinaryOp newBin = HopRewriteUtils.createBinary(sum1, sum2, applyOp);
+
+ //rewire new subdag
+ HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);
+ HopRewriteUtils.removeAllChildReferences(hi);
+ HopRewriteUtils.removeAllChildReferences(bop);
+ HopRewriteUtils.addChildReference(parent, newBin, pos);
+
+ hi = newBin;
+
+ LOG.debug("Applied pushdownSumOnAdditiveBinary.");
+ }
+ }
+ }
+
+ return hi;
+ }
/**
* Searches for weighted squared loss expressions and replaces them with a quaternary operator.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
new file mode 100644
index 0000000..1b87231
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.aggregate;
+
+import java.util.HashMap;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.instructions.Instruction;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
+
+/**
+ *
+ */
+public class PushdownSumBinaryTest extends AutomatedTestBase
+{
+ private final static String TEST_NAME1 = "PushdownSum1"; //+
+ private final static String TEST_NAME2 = "PushdownSum2"; //-
+
+ private final static String TEST_DIR = "functions/aggregate/";
+ private static final String TEST_CLASS_DIR = TEST_DIR + PushdownSumBinaryTest.class.getSimpleName() + "/";
+ private final static double eps = 1e-10;
+
+ private final static int rows = 1765;
+ private final static int cols = 19;
+ private final static double sparsity = 0.1;
+
+
+ @Override
+ public void setUp()
+ {
+ addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"C"}));
+ addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"C"}));
+ TestUtils.clearAssertionInformation();
+
+ if (TEST_CACHE_ENABLED) {
+ setOutAndExpectedDeletionDisabled(true);
+ }
+ }
+
+ @BeforeClass
+ public static void init()
+ {
+ TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
+ }
+
+ @AfterClass
+ public static void cleanUp()
+ {
+ if (TEST_CACHE_ENABLED) {
+ TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
+ }
+ }
+
+ @Test
+ public void testPushDownSumPlusSP() {
+ runPushdownSumOnBinaryTest(TEST_NAME1, true, ExecType.SPARK);
+ }
+
+ @Test
+ public void testPushDownSumMinusSP() {
+ runPushdownSumOnBinaryTest(TEST_NAME2, true, ExecType.SPARK);
+ }
+
+ @Test
+ public void testPushDownSumPlusNoRewriteSP() {
+ runPushdownSumOnBinaryTest(TEST_NAME1, false, ExecType.SPARK);
+ }
+
+ @Test
+ public void testPushDownSumMinusNoRewriteSP() {
+ runPushdownSumOnBinaryTest(TEST_NAME2, false, ExecType.SPARK);
+ }
+
+ /**
+ *
+ * @param testname
+ * @param type
+ * @param sparse
+ * @param instType
+ */
+ private void runPushdownSumOnBinaryTest( String testname, boolean equiDims, ExecType instType)
+ {
+ //rtplatform for MR
+ RUNTIME_PLATFORM platformOld = rtplatform;
+ switch( instType ){
+ case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+ case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+ default: rtplatform = RUNTIME_PLATFORM.HYBRID; break;
+ }
+
+ boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+ if( rtplatform == RUNTIME_PLATFORM.SPARK )
+ DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+ try
+ {
+ //determine script and function name
+ String TEST_NAME = testname;
+ String TEST_CACHE_DIR = TEST_CACHE_ENABLED ? TEST_NAME + "_" + String.valueOf(equiDims) + "/" : "";
+
+ TestConfiguration config = getTestConfiguration(TEST_NAME);
+ loadTestConfiguration(config, TEST_CACHE_DIR);
+
+ // This is for running the junit test the new way, i.e., construct the arguments directly
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ programArgs = new String[]{"-explain","-stats","-args", input("A"), input("B"), output("C") };
+ fullRScriptName = HOME + TEST_NAME + ".R";
+ rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
+
+ //generate actual dataset
+ double[][] A = getRandomMatrix(rows, cols, -1, 1, sparsity, 7);
+ writeInputMatrixWithMTD("A", A, true);
+ double[][] B = getRandomMatrix(rows, equiDims ? cols : 1, -1, 1, sparsity, 73);
+ writeInputMatrixWithMTD("B", B, true);
+
+ //run tests
+ runTest(true, false, null, -1);
+ runRScript(true);
+
+ //compare output matrices
+ HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C");
+ HashMap<CellIndex, Double> rfile = readRMatrixFromFS("C");
+ TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+
+ String lopcode = TEST_NAME.equals(TEST_NAME1) ? "+" : "-";
+ String opcode = equiDims ? lopcode : Instruction.SP_INST_PREFIX+"map"+lopcode;
+ Assert.assertTrue("Non-applied rewrite", Statistics.getCPHeavyHitterOpCodes().contains(opcode));
+ }
+ finally
+ {
+ rtplatform = platformOld;
+ DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum1.R b/src/test/scripts/functions/aggregate/PushdownSum1.R
new file mode 100644
index 0000000..4eb5c8b
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum1.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( ncol(B) == 1 ) {
+ B <- B %*% matrix(1,1,ncol(A))
+}
+
+C = as.matrix(sum(A+B));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum1.dml b/src/test/scripts/functions/aggregate/PushdownSum1.dml
new file mode 100644
index 0000000..e49db15
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum1.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1);
+B = read($2);
+C = as.matrix(sum(A+B))
+write(C, $3);
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum2.R b/src/test/scripts/functions/aggregate/PushdownSum2.R
new file mode 100644
index 0000000..08986ff
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum2.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( ncol(B) == 1 ) {
+ B <- B %*% matrix(1,1,ncol(A))
+}
+
+C = as.matrix(sum(A-B));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum2.dml b/src/test/scripts/functions/aggregate/PushdownSum2.dml
new file mode 100644
index 0000000..eec34e7
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum2.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1);
+B = read($2);
+C = as.matrix(sum(A-B))
+write(C, $3);