You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@systemml.apache.org by mb...@apache.org on 2016/01/08 20:07:56 UTC

[1/4] incubator-systemml git commit: [SYSTEMML-149] Cleanup remaining nimble references and custom config

Repository: incubator-systemml
Updated Branches:
  refs/heads/master 895610547 -> 19af3f9be


[SYSTEMML-149] Cleanup remaining nimble references and custom config

https://issues.apache.org/jira/browse/SYSTEMML-149

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/96019cf5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/96019cf5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/96019cf5

Branch: refs/heads/master
Commit: 96019cf5e2d62410a631f754c7654e27de842bf7
Parents: 8956105
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 11:52:14 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 11:52:14 2016 -0800

----------------------------------------------------------------------
 .../java/org/apache/sysml/conf/DMLConfig.java     | 18 +-----------------
 .../ExternalFunctionProgramBlock.java             |  7 ++-----
 .../ExternalFunctionProgramBlockCP.java           |  9 +++------
 .../sysml/test/integration/AutomatedTestBase.java | 16 +---------------
 .../functions/dmlscript/SystemML-config.xml       |  6 ------
 5 files changed, 7 insertions(+), 49 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index eefafc6..2782487 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -70,14 +70,6 @@ public class DMLConfig
 	public static final String CP_PARALLEL_MATRIXMULT = "cp.parallel.matrixmult";
 	public static final String CP_PARALLEL_TEXTIO   = "cp.parallel.textio";
 
-	//obsolete nimble configuration (removed 06/24/2015)
-	//public static final String NUM_MERGE_TASKS      = "NumMergeTasks";
-	//public static final String NUM_SOW_THREADS      = "NumberOfSowThreads";
-	//public static final String NUM_REAP_THREADS     = "NumberOfReapThreads";
-	//public static final String SOWER_WAIT_INTERVAL  = "SowerWaitInterval";
-	//public static final String REAPER_WAIT_INTERVAL = "ReaperWaitInterval";
-	//public static final String NIMBLE_SCRATCH       = "NimbleScratch";
-
 	//internal config
 	public static final String DEFAULT_SHARED_DIR_PERMISSION = "777"; //for local fs and DFS
 	public static String LOCAL_MR_MODE_STAGING_DIR = null;
@@ -101,12 +93,6 @@ public class DMLConfig
 		_defaultVals.put(YARN_APPMASTERMEM,    "2048" );
 		_defaultVals.put(YARN_MAPREDUCEMEM,    "-1" );
 		_defaultVals.put(YARN_APPQUEUE,    	   "default" );
-		//_defaultVals.put(NUM_MERGE_TASKS,      "4" );
-		//_defaultVals.put(NUM_SOW_THREADS,      "1" );
-		//_defaultVals.put(NUM_REAP_THREADS,     "1" );
-		//_defaultVals.put(SOWER_WAIT_INTERVAL,  "1000" );
-		//_defaultVals.put(REAPER_WAIT_INTERVAL, "1000" );
-		//_defaultVals.put(NIMBLE_SCRATCH,       "nimbleoutput" );	
 		_defaultVals.put(CP_PARALLEL_MATRIXMULT, "true" );
 		_defaultVals.put(CP_PARALLEL_TEXTIO,     "true" );
 	}
@@ -419,9 +405,7 @@ public class DMLConfig
 		String[] tmpConfig = new String[] { 
 				LOCAL_TMP_DIR,SCRATCH_SPACE,OPTIMIZATION_LEVEL,
 				NUM_REDUCERS, DEFAULT_BLOCK_SIZE,
-				YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM,
-				//NUM_MERGE_TASKS, NUM_SOW_THREADS,NUM_REAP_THREADS,
-				//SOWER_WAIT_INTERVAL,REAPER_WAIT_INTERVAL,NIMBLE_SCRATCH 
+				YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM, 
 				CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO
 		}; 
 		

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
index b3fbf8c..03a5d44 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlock.java
@@ -98,8 +98,6 @@ public class ExternalFunctionProgramBlock extends FunctionProgramBlock
 	{
 		super(prog, inputParams, outputParams);		
 		_baseDir = baseDir;
-		
-		//NOTE: no need to setup nimble queue for CP external functions
 	}
 	
 	public ExternalFunctionProgramBlock(Program prog,
@@ -610,10 +608,9 @@ public class ExternalFunctionProgramBlock extends FunctionProgramBlock
 	/**
 	 * Method to execute an external function invocation instruction.
 	 * 
+	 * @param ec
 	 * @param inst
-	 * @param dQueue
-	 * @throws NimbleCheckedRuntimeException
-	 * @throws DMLRuntimeException 
+	 * @throws DMLRuntimeException
 	 */
 	@SuppressWarnings("unchecked")
 	public void executeInstruction(ExecutionContext ec, ExternalFunctionInvocationInstruction inst) 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
index a143337..ad0dbcd 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ExternalFunctionProgramBlockCP.java
@@ -110,18 +110,15 @@ public class ExternalFunctionProgramBlockCP extends ExternalFunctionProgramBlock
 	}
 	
 	/**
-	 * Executes the external function instruction without the use of NIMBLE tasks.
+	 * Executes the external function instruction.
 	 * 
-	 * @param inst
-	 * @throws DMLRuntimeException 
-	 * @throws NimbleCheckedRuntimeException
 	 */
 	@Override
 	public void executeInstruction(ExecutionContext ec, ExternalFunctionInvocationInstruction inst) 
 		throws DMLRuntimeException 
 	{
-		// After removal of nimble, we moved the code of ExternalFunctionProgramBlockCP to 
-		// ExternalFunctionProgramBlock and hence hence both types of external functions can
+		// After the udf framework rework, we moved the code of ExternalFunctionProgramBlockCP 
+		// to ExternalFunctionProgramBlock and hence hence both types of external functions can
 		// share the same code path here.
 		super.executeInstruction(ec, inst);
 	}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
index efeb24e..9688754 100644
--- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
@@ -828,21 +828,7 @@ public abstract class AutomatedTestBase
 			String configTemplate = FileUtils.readFileToString(CONFIG_TEMPLATE_FILE,
 					"UTF-8");
 			
-			// *** HACK ALERT *** HACK ALERT *** HACK ALERT ***
-			// Nimble does not accept paths that use backslash as the separator character.
-			// Since some of the tests use Nimble, we use forward slash in the paths that
-			// we put into the config file.
-			String localTempForwardSlash = curLocalTempDir.getPath().replace(File.separator, "/");
-			String configContents = configTemplate.replace("<scratch>scratch_space</scratch>", 
-					String.format("<scratch>%s/scratch_space</scratch>", localTempForwardSlash));
-			configContents = configContents.replace("<localtmpdir>/tmp/systemml</localtmpdir>", 
-					String.format("<localtmpdir>%s/localtmp</localtmpdir>", localTempForwardSlash));
-			configContents = configContents.replace("<NimbleScratch>nimbleoutput</NimbleScratch>", 
-					String.format("<NimbleScratch>%s/nimbleoutput</NimbleScratch>",
-							localTempForwardSlash));
-			// *** END HACK ***
-			
-			FileUtils.write(getCurConfigFile(), configContents, "UTF-8");
+			FileUtils.write(getCurConfigFile(), configTemplate, "UTF-8");
 			
 			System.out.printf(
 					"This test case will use SystemML config file %s\n",

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/96019cf5/src/test/scripts/functions/dmlscript/SystemML-config.xml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/dmlscript/SystemML-config.xml b/src/test/scripts/functions/dmlscript/SystemML-config.xml
index 8a9a457..dfcbd87 100644
--- a/src/test/scripts/functions/dmlscript/SystemML-config.xml
+++ b/src/test/scripts/functions/dmlscript/SystemML-config.xml
@@ -20,12 +20,6 @@
 <numreducers>10</numreducers>
 <scratch>scratch_space</scratch>
 <defaultblocksize>1000</defaultblocksize>
-<NumMergeTasks>4</NumMergeTasks>
-<NumberOfSowThreads>1</NumberOfSowThreads>
-<NumberOfReapThreads>1</NumberOfReapThreads>
-<SowerWaitInterval>1000</SowerWaitInterval>
-<ReaperWaitInterval>1000</ReaperWaitInterval>
-<NimbleScratch>nimbleoutput</NimbleScratch>
 <cp.parallel.matrixmult>true</cp.parallel.matrixmult>
 <cp.parallel.textio>false</cp.parallel.textio>
 </root>

[2/4] incubator-systemml git commit: [SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds

Posted by mb...@apache.org.

[SYSTEMML-318] Moved new als-cg script to algorithms, renamed old als-ds

https://issues.apache.org/jira/browse/SYSTEMML-318

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a71bb12a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a71bb12a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a71bb12a

Branch: refs/heads/master
Commit: a71bb12ae68d08f60185bd8a9e09f47b532a4cc2
Parents: 96019cf
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:04:15 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:04:15 2016 -0800

----------------------------------------------------------------------
 scripts/algorithms/ALS-CG.dml | 176 +++++++++++++++++++++++++++++++++++++
 scripts/algorithms/ALS-DS.dml | 170 +++++++++++++++++++++++++++++++++++
 scripts/algorithms/ALS.dml    | 170 -----------------------------------
 scripts/staging/ALS-CG.dml    | 176 -------------------------------------
 4 files changed, 346 insertions(+), 346 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-CG.dml b/scripts/algorithms/ALS-CG.dml
new file mode 100644
index 0000000..cd2ba0b
--- /dev/null
+++ b/scripts/algorithms/ALS-CG.dml
@@ -0,0 +1,176 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V 
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT 
+# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X       String   ---      Location to read the input matrix X to be factorized
+# U       String   ---      Location to write the factor matrix U
+# V       String   ---      Location to write the factor matrix V
+# rank    Int      10       Rank of the factorization
+# reg     String   "L2"	    Regularization: 
+#                           "L2" = L2 regularization;
+#                           "wL2" = weighted L2 regularization
+# lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
+# maxi    Int      50       Maximum number of iterations
+# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating U and V once
+# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+#                           if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#                           if check is FALSE thr is ignored
+# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- An m x r matrix U, where r is the factorization rank 
+# 2- An r x n matrix V
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileX      = $X;
+fileU      = $U;
+fileV      = $V;
+
+# Default values of some parameters
+r          = ifdef ($rank, 10);         # $rank=10;
+reg	       = ifdef ($reg, "L2")         # $reg="L2";
+lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter   = ifdef ($maxi, 50);         # $maxi=50;
+check      = ifdef ($check, TRUE);	    # $check=FALSE;
+thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
+fmtO       = ifdef ($fmt, "text");      # $fmt="text";
+ 
+ 
+###### MAIN PART ######
+X = read (fileX);
+m = nrow (X);
+n = ncol (X);
+
+# initializing factor matrices
+U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
+V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
+
+W = ppred (X, 0, "!=");
+  
+# check for regularization
+if( reg == "L2" ) {
+  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+  row_nonzeros = matrix(1, nrow(W), 1);
+  col_nonzeros = matrix(1, ncol(W), 1);
+} 
+else if( reg == "wL2" ) {
+  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+  row_nonzeros = rowSums(W);
+  col_nonzeros = t(colSums(W));
+} 
+else {
+  stop ("wrong regularization! " + reg);
+}
+
+# Loss Function with L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#          + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
+# Loss Function with weighted L2:
+# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
+#          + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
+
+is_U = TRUE;  # TRUE = Optimize U, FALSE = Optimize V
+maxinneriter = r ; # min (ncol (U), 15);
+
+if( check ) {
+  loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+  loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+  print ("-----   Initial train loss: " + loss_init + " -----");
+}
+
+it = 0;
+converged = FALSE;
+while( as.integer(it/2) < max_iter & ! converged ) 
+{
+  it = it + 1;
+  if( is_U ) {
+    G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
+  } 
+  else {
+    G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
+  }
+
+  R = -G;
+  S = R;
+  norm_G2 = sum (G ^ 2);
+  norm_R2 = norm_G2;
+  
+  inneriter = 1;
+  tt = 0.000000001;
+  while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
+  {
+    if( is_U ) {
+      HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
+      alpha = norm_R2 / sum (S * HS);
+      U = U + alpha * S;  # OK since U is not used in HS
+    } 
+    else {
+      HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
+      alpha = norm_R2 / sum (S * HS);
+      V = V + alpha * S;  # OK since V is not used in HS
+    }
+
+    R = R - alpha * HS;
+    old_norm_R2 = norm_R2;
+    norm_R2 = sum (R ^ 2);
+    S = R + (norm_R2 / old_norm_R2) * S;
+    inneriter = inneriter + 1;
+  }  
+
+  is_U = ! is_U;
+	
+  # check for convergence
+  if( check & (it%%2 == 0) ) {
+    loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
+    loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
+	
+    loss_dec = (loss_init - loss_cur) / loss_init;
+    print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
+    if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
+      print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
+      converged = TRUE;
+    }
+    loss_init = loss_cur;
+  }
+}
+
+if( check ) {
+  print ("----- Final train loss: " + loss_init + " -----");
+}
+
+if( !converged ) {
+  print ("Max iteration achieved but not converged!");
+}
+
+V = t(V);
+write (U, fileU, format=fmtO);
+write (V, fileV, format=fmtO);
+ 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS-DS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS-DS.dml b/scripts/algorithms/ALS-DS.dml
new file mode 100644
index 0000000..1d0fce4
--- /dev/null
+++ b/scripts/algorithms/ALS-DS.dml
@@ -0,0 +1,170 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
+# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
+# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
+#
+# INPUT   PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# V       String   ---      Location to read the input matrix V to be factorized
+# L       String   ---      Location to write the factor matrix L
+# R       String   ---      Location to write the factor matrix R
+# rank    Int      10       Rank of the factorization
+# reg     String   "L2"	    Regularization: 
+#						    "L2" = L2 regularization;
+#                           "wL2" = weighted L2 regularization
+# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
+# maxi    Int      50       Maximum number of iterations
+# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
+# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
+# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
+#							if check is FALSE thr is ignored
+# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
+# ---------------------------------------------------------------------------------------------
+# OUTPUT: 
+# 1- An m x r matrix L, where r is the factorization rank 
+# 2- An r x n matrix R
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
+
+fileV      = $V;
+fileL	   = $L;
+fileR      = $R;
+
+# Default values of some parameters
+r          = ifdef ($rank, 10);	        # $rank=10;
+reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
+lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
+max_iter   = ifdef ($maxi, 50);         # $maxi=50;
+check      = ifdef ($check, FALSE);	    # $check=FALSE;
+thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
+fmtO       = ifdef ($fmt, "text");      # $fmt="text";
+
+V = read (fileV);
+
+
+# check the input matrix V, if some rows or columns contain only zeros remove them from V  
+V_nonzero_ind = ppred (V, 0, "!=");
+row_nonzeros = rowSums (V_nonzero_ind);
+col_nonzeros = t (colSums (V_nonzero_ind));
+orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
+orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
+num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
+num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
+if (num_zero_rows > 0) {
+	print ("Matrix V contains empty rows! These rows will be removed.");
+	V = removeEmpty (target = V, margin = "rows");
+}
+if (num_zero_cols > 0) {
+	print ("Matrix V contains empty columns! These columns will be removed.");
+	V = removeEmpty (target = V, margin = "cols");
+}
+if (num_zero_rows > 0 | num_zero_cols > 0) {
+	print ("Recomputing nonzero rows and columns!");
+	V_nonzero_ind = ppred (V, 0, "!=");
+	row_nonzeros = rowSums (V_nonzero_ind);
+	col_nonzeros = t (colSums (V_nonzero_ind));	
+}
+
+###### MAIN PART ######
+m = nrow (V);
+n = ncol (V);
+
+# initializing factor matrices
+L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
+R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
+
+# initializing transformed matrices
+Vt = t(V);
+  
+# check for regularization
+if (reg == "L2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
+} else if (reg == "wL2") {
+	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
+} else {
+	stop ("wrong regularization! " + reg);
+}
+
+if (check) {
+	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+	print ("-----   Initial train loss: " + loss_init + " -----");
+}
+
+lambda_I = diag (matrix (lambda, rows = r, cols = 1));
+it = 0;
+converged = FALSE;
+while ((it < max_iter) & (!converged)) {
+	it = it + 1;
+	# keep R fixed and update L
+	parfor (i in 1:m) {
+    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
+		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
+		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
+		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
+	}
+  
+	# keep L fixed and update R
+	parfor (j in 1:n) {
+		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
+		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
+		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
+		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
+	}
+	
+	# check for convergence
+	if (check) {
+		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
+		loss_dec = (loss_init - loss_cur) / loss_init;
+		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
+		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
+			print ("----- ALS converged after " + it + " iterations!");
+			converged = TRUE;
+		}
+		loss_init = loss_cur;
+	}
+} # end of while loop
+
+if (check) {
+	print ("-----	Final train loss: " + loss_init + " -----");
+}
+
+if (!converged) {
+   print ("Max iteration achieved but not converged!");
+} 
+
+# inject 0s in L if original V had empty rows
+if (num_zero_rows > 0) {
+	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
+}
+# inject 0s in R if original V had empty rows
+if (num_zero_cols > 0) {
+	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
+}
+Rt = t (R);
+write (L, fileL, format=fmtO);
+write (Rt, fileR, format=fmtO);
+ 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/algorithms/ALS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/ALS.dml b/scripts/algorithms/ALS.dml
deleted file mode 100644
index 1d0fce4..0000000
--- a/scripts/algorithms/ALS.dml
+++ /dev/null
@@ -1,170 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX V INTO TWO MATRICES L AND R 
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM 
-# MATRICES L AND R ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# V       String   ---      Location to read the input matrix V to be factorized
-# L       String   ---      Location to write the factor matrix L
-# R       String   ---      Location to write the factor matrix R
-# rank    Int      10       Rank of the factorization
-# reg     String   "L2"	    Regularization: 
-#						    "L2" = L2 regularization;
-#                           "wL2" = weighted L2 regularization
-# lambda  Double   0.0      Regularization parameter, no regularization if 0.0
-# maxi    Int      50       Maximum number of iterations
-# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating L and R once
-# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-# 							if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#							if check is FALSE thr is ignored
-# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- An m x r matrix L, where r is the factorization rank 
-# 2- An r x n matrix R
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS.dml -nvargs V=INPUT_DIR/V L=OUTPUT_DIR/L R=OUTPUT_DIR/R rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileV      = $V;
-fileL	   = $L;
-fileR      = $R;
-
-# Default values of some parameters
-r          = ifdef ($rank, 10);	        # $rank=10;
-reg	   	   = ifdef ($reg, "L2")         # $reg="L2";
-lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter   = ifdef ($maxi, 50);         # $maxi=50;
-check      = ifdef ($check, FALSE);	    # $check=FALSE;
-thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
-fmtO       = ifdef ($fmt, "text");      # $fmt="text";
-
-V = read (fileV);
-
-
-# check the input matrix V, if some rows or columns contain only zeros remove them from V  
-V_nonzero_ind = ppred (V, 0, "!=");
-row_nonzeros = rowSums (V_nonzero_ind);
-col_nonzeros = t (colSums (V_nonzero_ind));
-orig_nonzero_rows_ind = ppred (row_nonzeros, 0, "!=");
-orig_nonzero_cols_ind = ppred (col_nonzeros, 0, "!=");
-num_zero_rows = nrow (V) - sum (orig_nonzero_rows_ind);
-num_zero_cols = ncol (V) - sum (orig_nonzero_cols_ind);
-if (num_zero_rows > 0) {
-	print ("Matrix V contains empty rows! These rows will be removed.");
-	V = removeEmpty (target = V, margin = "rows");
-}
-if (num_zero_cols > 0) {
-	print ("Matrix V contains empty columns! These columns will be removed.");
-	V = removeEmpty (target = V, margin = "cols");
-}
-if (num_zero_rows > 0 | num_zero_cols > 0) {
-	print ("Recomputing nonzero rows and columns!");
-	V_nonzero_ind = ppred (V, 0, "!=");
-	row_nonzeros = rowSums (V_nonzero_ind);
-	col_nonzeros = t (colSums (V_nonzero_ind));	
-}
-
-###### MAIN PART ######
-m = nrow (V);
-n = ncol (V);
-
-# initializing factor matrices
-L = rand (rows = m, cols = r, min = -0.5, max = 0.5);
-R = rand (rows = n, cols = r, min = -0.5, max = 0.5);
-
-# initializing transformed matrices
-Vt = t(V);
-  
-# check for regularization
-if (reg == "L2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-} else if (reg == "wL2") {
-	print ("BEGIN ALS SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-} else {
-	stop ("wrong regularization! " + reg);
-}
-
-if (check) {
-	loss_init = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-	print ("-----   Initial train loss: " + loss_init + " -----");
-}
-
-lambda_I = diag (matrix (lambda, rows = r, cols = 1));
-it = 0;
-converged = FALSE;
-while ((it < max_iter) & (!converged)) {
-	it = it + 1;
-	# keep R fixed and update L
-	parfor (i in 1:m) {
-    	R_nonzero_ind = t(ppred(V[i,],0,"!="));
-		R_nonzero = removeEmpty (target=R * R_nonzero_ind, margin="rows");			
-		A1 = (t(R_nonzero) %*% R_nonzero) + (as.scalar(row_nonzeros[i,1]) * lambda_I); # coefficient matrix
-		L[i,] = t(solve (A1, t(V[i,] %*% R)));		
-	}
-  
-	# keep L fixed and update R
-	parfor (j in 1:n) {
-		L_nonzero_ind = t(ppred(Vt[j,],0,"!="))
-		L_nonzero = removeEmpty (target=L * L_nonzero_ind, margin="rows");
-		A2 = (t(L_nonzero) %*% L_nonzero) + (as.scalar(col_nonzeros[j,1]) * lambda_I); # coefficient matrix
-		R[j,] = t(solve (A2, t(Vt[j,] %*% L)));    
-	}
-	
-	# check for convergence
-	if (check) {
-		loss_cur = sum (V_nonzero_ind * (V - (L %*% t(R)))^2) + lambda * (sum ((L^2) * row_nonzeros) + sum ((R^2) * col_nonzeros));
-		loss_dec = (loss_init - loss_cur) / loss_init;
-		print ("Train loss at iteration (R) " + it + ": " + loss_cur + " loss-dec " + loss_dec);
-		if (loss_dec >= 0 & loss_dec < thr | loss_init == 0) {
-			print ("----- ALS converged after " + it + " iterations!");
-			converged = TRUE;
-		}
-		loss_init = loss_cur;
-	}
-} # end of while loop
-
-if (check) {
-	print ("-----	Final train loss: " + loss_init + " -----");
-}
-
-if (!converged) {
-   print ("Max iteration achieved but not converged!");
-} 
-
-# inject 0s in L if original V had empty rows
-if (num_zero_rows > 0) {
-	L = removeEmpty (target = diag (orig_nonzero_rows_ind), margin = "cols") %*% L;
-}
-# inject 0s in R if original V had empty rows
-if (num_zero_cols > 0) {
-	R = removeEmpty (target = diag (orig_nonzero_cols_ind), margin = "cols") %*% R; 
-}
-Rt = t (R);
-write (L, fileL, format=fmtO);
-write (Rt, fileR, format=fmtO);
- 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a71bb12a/scripts/staging/ALS-CG.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/ALS-CG.dml b/scripts/staging/ALS-CG.dml
deleted file mode 100644
index cd2ba0b..0000000
--- a/scripts/staging/ALS-CG.dml
+++ /dev/null
@@ -1,176 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES AN APPROXIMATE FACTORIZATIONOF A LOW-RANK MATRIX X INTO TWO MATRICES U AND V 
-# USING ALTERNATING-LEAST-SQUARES (ALS) ALGORITHM WITH CONJUGATE GRADIENT 
-# MATRICES U AND V ARE COMPUTED BY MINIMIZING A LOSS FUNCTION (WITH REGULARIZATION)
-#
-# INPUT   PARAMETERS:
-# ---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT  MEANING
-# ---------------------------------------------------------------------------------------------
-# X       String   ---      Location to read the input matrix X to be factorized
-# U       String   ---      Location to write the factor matrix U
-# V       String   ---      Location to write the factor matrix V
-# rank    Int      10       Rank of the factorization
-# reg     String   "L2"	    Regularization: 
-#                           "L2" = L2 regularization;
-#                           "wL2" = weighted L2 regularization
-# lambda  Double   0.000001 Regularization parameter, no regularization if 0.0
-# maxi    Int      50       Maximum number of iterations
-# check   Boolean  FALSE    Check for convergence after every iteration, i.e., updating U and V once
-# thr     Double   0.0001   Assuming check is set to TRUE, the algorithm stops and convergence is declared 
-#                           if the decrease in loss in any two consecutive iterations falls below this threshold; 
-#                           if check is FALSE thr is ignored
-# fmt     String   "text"   The output format of the factor matrices L and R, such as "text" or "csv"
-# ---------------------------------------------------------------------------------------------
-# OUTPUT: 
-# 1- An m x r matrix U, where r is the factorization rank 
-# 2- An r x n matrix V
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f ALS-CG.dml -nvargs X=INPUT_DIR/X U=OUTPUT_DIR/U V=OUTPUT_DIR/V rank=10 reg="L2" lambda=0.0001 fmt=csv
-
-fileX      = $X;
-fileU      = $U;
-fileV      = $V;
-
-# Default values of some parameters
-r          = ifdef ($rank, 10);         # $rank=10;
-reg	       = ifdef ($reg, "L2")         # $reg="L2";
-lambda	   = ifdef ($lambda, 0.000001); # $lambda=0.000001;
-max_iter   = ifdef ($maxi, 50);         # $maxi=50;
-check      = ifdef ($check, TRUE);	    # $check=FALSE;
-thr        = ifdef ($thr, 0.0001);      # $thr=0.0001;
-fmtO       = ifdef ($fmt, "text");      # $fmt="text";
- 
- 
-###### MAIN PART ######
-X = read (fileX);
-m = nrow (X);
-n = ncol (X);
-
-# initializing factor matrices
-U = rand (rows = m, cols = r, min = -0.5, max = 0.5); # mxr
-V = rand (rows = n, cols = r, min = -0.5, max = 0.5); # nxr
-
-W = ppred (X, 0, "!=");
-  
-# check for regularization
-if( reg == "L2" ) {
-  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + L2 WITH LAMBDA - " + lambda);
-  row_nonzeros = matrix(1, nrow(W), 1);
-  col_nonzeros = matrix(1, ncol(W), 1);
-} 
-else if( reg == "wL2" ) {
-  print ("BEGIN ALS-CG SCRIPT WITH NONZERO SQUARED LOSS + WEIGHTED L2 WITH LAMBDA - " + lambda);
-  row_nonzeros = rowSums(W);
-  col_nonzeros = t(colSums(W));
-} 
-else {
-  stop ("wrong regularization! " + reg);
-}
-
-# Loss Function with L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#          + 0.5 * lambda * (sum (U ^ 2) + sum (V ^ 2))
-# Loss Function with weighted L2:
-# f (U, V) = 0.5 * sum (W * (U %*% V - X) ^ 2)
-#          + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros))
-
-is_U = TRUE;  # TRUE = Optimize U, FALSE = Optimize V
-maxinneriter = r ; # min (ncol (U), 15);
-
-if( check ) {
-  loss_init = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
-  loss_init = loss_init + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-  print ("-----   Initial train loss: " + loss_init + " -----");
-}
-
-it = 0;
-converged = FALSE;
-while( as.integer(it/2) < max_iter & ! converged ) 
-{
-  it = it + 1;
-  if( is_U ) {
-    G = (ppred(X,0,"!=") * (U %*% t(V) - X)) %*% V + lambda * U * row_nonzeros;
-  } 
-  else {
-    G = t(t(U) %*% (ppred(X,0,"!=") * (U %*% t(V) - X))) + lambda * V * col_nonzeros;
-  }
-
-  R = -G;
-  S = R;
-  norm_G2 = sum (G ^ 2);
-  norm_R2 = norm_G2;
-  
-  inneriter = 1;
-  tt = 0.000000001;
-  while( norm_R2 > tt * norm_G2 & inneriter <= maxinneriter )
-  {
-    if( is_U ) {
-      HS = (W * (S %*% t(V))) %*% V + lambda * S * row_nonzeros;
-      alpha = norm_R2 / sum (S * HS);
-      U = U + alpha * S;  # OK since U is not used in HS
-    } 
-    else {
-      HS = t(t(U) %*% (W * (U %*% t(S)))) + lambda * S * col_nonzeros;
-      alpha = norm_R2 / sum (S * HS);
-      V = V + alpha * S;  # OK since V is not used in HS
-    }
-
-    R = R - alpha * HS;
-    old_norm_R2 = norm_R2;
-    norm_R2 = sum (R ^ 2);
-    S = R + (norm_R2 / old_norm_R2) * S;
-    inneriter = inneriter + 1;
-  }  
-
-  is_U = ! is_U;
-	
-  # check for convergence
-  if( check & (it%%2 == 0) ) {
-    loss_cur = 0.5 * sum (ppred(X,0, "!=") * (U %*% t(V) - X) ^ 2);
-    loss_cur = loss_cur + 0.5 * lambda * (sum (U ^ 2 * row_nonzeros) + sum (V ^ 2 * col_nonzeros));
-	
-    loss_dec = (loss_init - loss_cur) / loss_init;
-    print ("Train loss at iteration (" + as.integer(it/2) + "): " + loss_cur + " loss-dec " + loss_dec);
-    if( loss_dec >= 0 & loss_dec < thr | loss_init == 0 ) {
-      print ("----- ALS-CG converged after " + as.integer(it/2) + " iterations!");
-      converged = TRUE;
-    }
-    loss_init = loss_cur;
-  }
-}
-
-if( check ) {
-  print ("----- Final train loss: " + loss_init + " -----");
-}
-
-if( !converged ) {
-  print ("Max iteration achieved but not converged!");
-}
-
-V = t(V);
-write (U, fileU, format=fmtO);
-write (V, fileV, format=fmtO);
- 
\ No newline at end of file

[3/4] incubator-systemml git commit: New probabilistic pca (ppca) script, still in staging; by Narine

Posted by mb...@apache.org.

New probabilistic pca (ppca) script, still in staging; by Narine

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/83a5b42d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/83a5b42d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/83a5b42d

Branch: refs/heads/master
Commit: 83a5b42d8d0baf61b6a4eecd5c1c18d16dada9e3
Parents: a71bb12
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Thu Jan 7 12:49:22 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Thu Jan 7 12:49:22 2016 -0800

----------------------------------------------------------------------
 scripts/staging/PPCA.dml | 160 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/83a5b42d/scripts/staging/PPCA.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/PPCA.dml b/scripts/staging/PPCA.dml
new file mode 100644
index 0000000..667c709
--- /dev/null
+++ b/scripts/staging/PPCA.dml
@@ -0,0 +1,160 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+ 
+# This script performs Probabilistic Principal Component Analysis (PCA) on the given input data. 
+# It is based on paper: sPCA: Scalable Principal Component Analysis for Big Data on Distributed 
+# Platforms. Tarek Elgamal et.al.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME   	TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# X  	 	String ---      location to read the matrix X input matrix
+# k      	Int    ---      indicates dimension of the new vector space constructed from eigen vectors
+# tolobj 	Int    0.00001  objective function tolerance value to stop ppca algorithm
+# tolrecerr	Int    0.02     reconstruction error tolerance value to stop the algorithm	
+# iter   	Int    10       maximum number of iterations
+# fmt    	String 'text'   output format of results PPCA such as "text" or "csv"
+# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X  C=/OUTPUT_DIR/C V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
+# ---------------------------------------------------------------------------------------------
+# OUTPUT PARAMETERS: 
+# ---------------------------------------------------------------------------------------------
+# NAME   TYPE   DEFAULT  MEANING
+# ---------------------------------------------------------------------------------------------
+# C     	Matrix  ---     principal components
+# V      	Matrix  ---     eigenvalues / eigenvalues of principal components
+#
+
+X = read($X);
+
+fileC = $C;
+fileV = $V;
+
+k = ifdef($k, ncol(X));
+iter = ifdef($iter, 10);
+tolobj = ifdef($tolobj, 0.00001);
+tolrecerr = ifdef($tolrecerr, 0.02);
+fmt0 = ifdef($fmt, "text"); 
+
+n = nrow(X);
+m = ncol(X);
+
+#initializing principal components matrix
+C =  rand(rows=m, cols=k, pdf="normal");
+ss = rand(rows=1, cols=1, pdf="normal");
+ss = as.scalar(ss);
+ssPrev = ss;
+
+# best selected principle components - with the lowest reconstruction error 
+PC = C;
+
+# initilizing reconstruction error
+RE = tolrecerr+1;
+REBest = RE;
+
+Z = matrix(0,rows=1,cols=1);
+
+#Objective function value
+ObjRelChng = tolobj+1;
+
+# mean centered input matrix - dim -> [n,m]
+Xm = X - colMeans(X);
+
+#I -> k x k
+ITMP = matrix(1,rows=k,cols=1);
+I = diag(ITMP);
+
+i = 0;
+while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
+	#Estimation step - Covariance matrix 
+	#M -> k x k
+	M = t(C) %*% C + I*ss; 
+	
+	#Auxilary matrix with n latent variables 
+	# Z -> n x k		
+	Z = Xm %*% (C %*% inv(M)); 
+
+	#ZtZ -> k x k
+	ZtZ = t(Z) %*% Z + inv(M)*ss;
+	
+	#XtZ -> m x k
+	XtZ = t(Xm) %*% Z;
+	
+	#Maximization step
+	#C ->  m x k
+	ZtZ_sum = sum(ZtZ); #+n*inv(M)); 
+	C = XtZ/ZtZ_sum;
+
+	#ss2 -> 1 x 1
+	ss2 = trace(ZtZ * (t(C) %*% C));
+
+	#ss3 -> 1 x 1 
+	ss3 = sum((Z %*% t(C)) %*% t(Xm));
+	
+	#Frobenius norm of reconstruction error -> Euclidean norm 
+	#Fn -> 1 x 1	
+	Fn = sum(Xm*Xm);
+
+	#ss -> 1 x 1
+	ss = (Fn + ss2 - 2*ss3)/(n*m);
+
+   #calculating objective function relative change
+   ObjRelChng = abs(1 - ss/ssPrev);
+   #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
+
+	#Reconstruction error
+	R = ((Z %*% t(C)) -  Xm);	
+
+	#calculate the error
+	#TODO rethink calculation of reconstruction error .... 
+	#1-Norm of reconstruction error - a big dense matrix 
+	#RE -> n x m
+	RE = abs(sum(R)/sum(Xm));	
+	if (RE < REBest){
+		PC = C;
+		REBest = RE;
+	}	
+	#print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2  +" ) - 2*ss3( " + ss3 + " ), Reconstruction Error: " + RE);
+
+	ssPrev = ss;	
+	i = i+1;
+}
+print("Objective Relative Change: " + ObjRelChng);
+print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
+
+# reconstructs data
+# RD -> n x k
+RD = X %*% PC;
+
+# calculate eigenvalues - principle component variance
+RDMean = colMeans(RD);
+V = t(colMeans(RD*RD) - (RDMean*RDMean));
+
+# sorting eigenvalues and eigenvectors in decreasing order
+V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
+VF_decr = table(seq(1,nrow(V)),V_decr_idx);
+V = VF_decr %*% V;
+PC = PC %*% VF_decr;
+
+# writing principal components 
+write(PC, fileC, format=fmt0);
+# writing eigen values/pc variance
+write(V, fileV, format=fmt0);

[4/4] incubator-systemml git commit: New simplification rewrite 'pushdown sum on additive binary', for ppca

Posted by mb...@apache.org.

New simplification rewrite 'pushdown sum on additive binary', for ppca

For example, we now rewrite sum(A+B) -> sum(A)+sum(B) and sum(A-B) ->
sum(A)-sum(B) if dims(A)==dims(B) and dt(A)==dt(B)==MATRIX. This
prevents an unnecessary intermediate, reduces the number of scans from 3
reads / 1 write to two reads, and simplifies binary/unary operations to
pure unary operations that are easier to parallelize. Down the road, we
can generalize this to matrix-vector and matrix-scalar operations too.

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/19af3f9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/19af3f9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/19af3f9b

Branch: refs/heads/master
Commit: 19af3f9be3736853ff0ccae4e2b074a4b5905c03
Parents: 83a5b42
Author: Matthias Boehm <mb...@us.ibm.com>
Authored: Fri Jan 8 11:07:18 2016 -0800
Committer: Matthias Boehm <mb...@us.ibm.com>
Committed: Fri Jan 8 11:07:18 2016 -0800

----------------------------------------------------------------------
 .../sysml/hops/rewrite/HopRewriteUtils.java     |  30 ++++
 .../RewriteAlgebraicSimplificationDynamic.java  |  51 ++++++
 .../aggregate/PushdownSumBinaryTest.java        | 163 +++++++++++++++++++
 .../scripts/functions/aggregate/PushdownSum1.R  |  34 ++++
 .../functions/aggregate/PushdownSum1.dml        |  25 +++
 .../scripts/functions/aggregate/PushdownSum2.R  |  34 ++++
 .../functions/aggregate/PushdownSum2.dml        |  25 +++
 7 files changed, 362 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
index 95ddf0f..891c0b1 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java
@@ -25,6 +25,7 @@ import java.util.HashMap;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.hops.AggBinaryOp;
+import org.apache.sysml.hops.AggUnaryOp;
 import org.apache.sysml.hops.BinaryOp;
 import org.apache.sysml.hops.DataOp;
 import org.apache.sysml.hops.Hop;
@@ -32,6 +33,7 @@ import org.apache.sysml.hops.Hop.AggOp;
 import org.apache.sysml.hops.Hop.DataGenMethod;
 import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.Hop.DataOpTypes;
+import org.apache.sysml.hops.Hop.Direction;
 import org.apache.sysml.hops.Hop.FileFormatTypes;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.Hop.ParamBuiltinOp;
@@ -551,6 +553,34 @@ public class HopRewriteUtils
 	
 	/**
 	 * 
+	 * @param input
+	 * @return
+	 */
+	public static AggUnaryOp createSum( Hop input ) {
+		return createAggUnaryOp(input, AggOp.SUM, Direction.RowCol);
+	}
+	
+	/**
+	 * 
+	 * @param input
+	 * @param op
+	 * @param dir
+	 * @return
+	 */
+	public static AggUnaryOp createAggUnaryOp( Hop input, AggOp op, Direction dir )
+	{
+		DataType dt = (dir==Direction.RowCol) ? DataType.SCALAR : input.getDataType();
+		
+		AggUnaryOp auop = new AggUnaryOp(input.getName(), dt, input.getValueType(), op, dir, input);
+		auop.setRowsInBlock(input.getRowsInBlock());
+		auop.setColsInBlock(input.getColsInBlock());
+		auop.refreshSizeInformation();
+		
+		return auop;
+	}
+	
+	/**
+	 * 
 	 * @param left
 	 * @param right
 	 * @return

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
index 7c4a67a..31c394b 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
@@ -166,6 +166,7 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hi = simplifyDiagMatrixMult(hop, hi, i);          //e.g., diag(X%*%Y)->rowSums(X*t(Y)); if col vector
 			hi = simplifySumDiagToTrace(hi);                  //e.g., sum(diag(X)) -> trace(X); if col vector
 			hi = pushdownBinaryOperationOnDiag(hop, hi, i);   //e.g., diag(X)*7 -> diag(X*7); if col vector
+			hi = pushdownSumOnAdditiveBinary(hop, hi, i);     //e.g., sum(A+B) -> sum(A)+sum(B); if dims(A)==dims(B)
 			hi = simplifyWeightedSquaredLoss(hop, hi, i);     //e.g., sum(W * (X - U %*% t(V)) ^ 2) -> wsl(X, U, t(V), W, true), 
 			hi = simplifyWeightedSigmoidMMChains(hop, hi, i); //e.g., W * sigmoid(Y%*%t(X)) -> wsigmoid(W, Y, t(X), type)
 			hi = simplifyWeightedDivMM(hop, hi, i);           //e.g., t(U) %*% (X/(U%*%t(V))) -> wdivmm(X, U, t(V), left)
@@ -1349,6 +1350,56 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 		return hi;
 	}
 	
+	/**
+	 * patterns: sum(A+B)->sum(A)+sum(B); sum(A-B)->sum(A)-sum(B)
+	 * 
+	 * @param parent
+	 * @param hi
+	 * @param pos
+	 * @return
+	 */
+	private Hop pushdownSumOnAdditiveBinary(Hop parent, Hop hi, int pos) 
+	{
+		//all patterns headed by fiull sum over binary operation
+		if(    hi instanceof AggUnaryOp //full sum root over binaryop
+			&& ((AggUnaryOp)hi).getDirection()==Direction.RowCol
+			&& ((AggUnaryOp)hi).getOp() == AggOp.SUM 
+			&& hi.getInput().get(0) instanceof BinaryOp   
+			&& hi.getInput().get(0).getParent().size()==1 ) //single parent
+		{
+			BinaryOp bop = (BinaryOp) hi.getInput().get(0);
+			Hop left = bop.getInput().get(0);
+			Hop right = bop.getInput().get(1);
+			
+			if( HopRewriteUtils.isEqualSize(left, right)  //dims(A) == dims(B)
+				&& left.getDataType() == DataType.MATRIX
+				&& right.getDataType() == DataType.MATRIX )			
+			{
+				OpOp2 applyOp = ( bop.getOp() == OpOp2.PLUS //pattern a: sum(A+B)->sum(A)+sum(B)
+						|| bop.getOp() == OpOp2.MINUS )     //pattern b: sum(A-B)->sum(A)-sum(B)
+						? bop.getOp() : null;
+				
+				if( applyOp != null ) {
+					//create new subdag sum(A) bop sum(B)
+					AggUnaryOp sum1 = HopRewriteUtils.createSum(left);
+					AggUnaryOp sum2 = HopRewriteUtils.createSum(right);					
+					BinaryOp newBin = HopRewriteUtils.createBinary(sum1, sum2, applyOp);
+
+					//rewire new subdag
+					HopRewriteUtils.removeChildReferenceByPos(parent, hi, pos);		
+					HopRewriteUtils.removeAllChildReferences(hi);
+					HopRewriteUtils.removeAllChildReferences(bop);
+					HopRewriteUtils.addChildReference(parent, newBin, pos);
+					
+					hi = newBin;
+					
+					LOG.debug("Applied pushdownSumOnAdditiveBinary.");
+				}				
+			}
+		}
+	
+		return hi;
+	}
 
 	/**
 	 * Searches for weighted squared loss expressions and replaces them with a quaternary operator. 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
new file mode 100644
index 0000000..1b87231
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/aggregate/PushdownSumBinaryTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.aggregate;
+
+import java.util.HashMap;
+
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.instructions.Instruction;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+import org.apache.sysml.utils.Statistics;
+
+/**
+ * 
+ */
+public class PushdownSumBinaryTest extends AutomatedTestBase 
+{
+	private final static String TEST_NAME1 = "PushdownSum1"; //+
+	private final static String TEST_NAME2 = "PushdownSum2"; //-
+	
+	private final static String TEST_DIR = "functions/aggregate/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + PushdownSumBinaryTest.class.getSimpleName() + "/";
+	private final static double eps = 1e-10;
+	
+	private final static int rows = 1765;
+	private final static int cols = 19;
+	private final static double sparsity = 0.1;
+	
+	
+	@Override
+	public void setUp() 
+	{
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[]{"C"})); 
+		addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[]{"C"})); 
+		TestUtils.clearAssertionInformation();
+
+		if (TEST_CACHE_ENABLED) {
+			setOutAndExpectedDeletionDisabled(true);
+		}
+	}
+
+	@BeforeClass
+	public static void init()
+	{
+		TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
+	}
+
+	@AfterClass
+	public static void cleanUp()
+	{
+		if (TEST_CACHE_ENABLED) {
+			TestUtils.clearDirectory(TEST_DATA_DIR + TEST_CLASS_DIR);
+		}
+	}
+
+	@Test
+	public void testPushDownSumPlusSP() {
+		runPushdownSumOnBinaryTest(TEST_NAME1, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testPushDownSumMinusSP() {
+		runPushdownSumOnBinaryTest(TEST_NAME2, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testPushDownSumPlusNoRewriteSP() {
+		runPushdownSumOnBinaryTest(TEST_NAME1, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testPushDownSumMinusNoRewriteSP() {
+		runPushdownSumOnBinaryTest(TEST_NAME2, false, ExecType.SPARK);
+	}
+		
+	/**
+	 * 
+	 * @param testname
+	 * @param type
+	 * @param sparse
+	 * @param instType
+	 */
+	private void runPushdownSumOnBinaryTest( String testname, boolean equiDims, ExecType instType) 
+	{
+		//rtplatform for MR
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+			
+		try
+		{
+			//determine script and function name
+			String TEST_NAME = testname;			
+			String TEST_CACHE_DIR = TEST_CACHE_ENABLED ? TEST_NAME + "_" + String.valueOf(equiDims) + "/" : "";
+			
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config, TEST_CACHE_DIR);
+			
+			// This is for running the junit test the new way, i.e., construct the arguments directly
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{"-explain","-stats","-args", input("A"), input("B"), output("C") };
+			fullRScriptName = HOME + TEST_NAME + ".R";
+			rCmd = "Rscript" + " " + fullRScriptName + " " + inputDir() + " " + expectedDir();
+	
+			//generate actual dataset 
+			double[][] A = getRandomMatrix(rows, cols, -1, 1, sparsity, 7); 
+			writeInputMatrixWithMTD("A", A, true);
+			double[][] B = getRandomMatrix(rows, equiDims ? cols : 1, -1, 1, sparsity, 73); 
+			writeInputMatrixWithMTD("B", B, true);
+			
+			//run tests
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare output matrices
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("C");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			
+			String lopcode = TEST_NAME.equals(TEST_NAME1) ? "+" : "-";
+			String opcode = equiDims ? lopcode : Instruction.SP_INST_PREFIX+"map"+lopcode;
+			Assert.assertTrue("Non-applied rewrite", Statistics.getCPHeavyHitterOpCodes().contains(opcode));	
+		}
+		finally
+		{
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum1.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum1.R b/src/test/scripts/functions/aggregate/PushdownSum1.R
new file mode 100644
index 0000000..4eb5c8b
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum1.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( ncol(B) == 1 ) {
+  B <- B %*% matrix(1,1,ncol(A))
+}
+
+C = as.matrix(sum(A+B));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum1.dml b/src/test/scripts/functions/aggregate/PushdownSum1.dml
new file mode 100644
index 0000000..e49db15
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum1.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1);
+B = read($2);
+C = as.matrix(sum(A+B))
+write(C, $3);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum2.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum2.R b/src/test/scripts/functions/aggregate/PushdownSum2.R
new file mode 100644
index 0000000..08986ff
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum2.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+
+A <- as.matrix(readMM(paste(args[1], "A.mtx", sep="")))
+B <- as.matrix(readMM(paste(args[1], "B.mtx", sep="")))
+if( ncol(B) == 1 ) {
+  B <- B %*% matrix(1,1,ncol(A))
+}
+
+C = as.matrix(sum(A-B));
+
+writeMM(as(C, "CsparseMatrix"), paste(args[2], "C", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/19af3f9b/src/test/scripts/functions/aggregate/PushdownSum2.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/aggregate/PushdownSum2.dml b/src/test/scripts/functions/aggregate/PushdownSum2.dml
new file mode 100644
index 0000000..eec34e7
--- /dev/null
+++ b/src/test/scripts/functions/aggregate/PushdownSum2.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+A = read($1);
+B = read($2);
+C = as.matrix(sum(A-B))
+write(C, $3);