You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/02/27 18:36:11 UTC

[9/9] incubator-systemml git commit: [SYSTEMML-1286] Code generator compiler integration, incl tests

[SYSTEMML-1286] Code generator compiler integration, incl tests

This patch fully integrates the new code generator into SystemML's
compilation chain including dynamic recompilation. Note that this does
not yet apply to MLContext, JMLC and other replicated instances of our
compilation chain - however, SYSTEMML-1325 will anyway consolidate this.

Furthermore, this also introduces various function and application
tests, including algorithms that were not contained in our testsuite so
far (e.g., KMeans, Mlogreg, and PNMF).


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/bbefe96b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/bbefe96b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/bbefe96b

Branch: refs/heads/master
Commit: bbefe96b263f697eb3f4e0297379840930356c0d
Parents: 982ecb1
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sun Feb 26 19:12:50 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sun Feb 26 20:42:51 2017 -0800

----------------------------------------------------------------------
 conf/SystemML-config.xml.template               |   12 +
 .../java/org/apache/sysml/api/DMLScript.java    |   15 +
 .../java/org/apache/sysml/conf/DMLConfig.java   |   13 +-
 .../org/apache/sysml/hops/OptimizerUtils.java   |   17 +-
 .../apache/sysml/hops/recompile/Recompiler.java |   14 +
 .../RewriteAlgebraicSimplificationDynamic.java  |   25 +-
 .../RewriteAlgebraicSimplificationStatic.java   |   15 +-
 .../org/apache/sysml/parser/DMLTranslator.java  |    6 +
 .../test/integration/AutomatedTestBase.java     |    7 +
 .../functions/codegen/AlgorithmGLM.java         |  194 ++++
 .../functions/codegen/AlgorithmKMeans.java      |  192 ++++
 .../functions/codegen/AlgorithmL2SVM.java       |  157 +++
 .../functions/codegen/AlgorithmLinregCG.java    |  149 +++
 .../functions/codegen/AlgorithmMLogreg.java     |  197 ++++
 .../functions/codegen/AlgorithmMSVM.java        |  157 +++
 .../functions/codegen/AlgorithmPNMF.java        |  142 +++
 .../functions/codegen/CellwiseTmplTest.java     |  183 +++
 .../functions/codegen/DAGCellwiseTmplTest.java  |  161 +++
 .../functions/codegen/OuterProdTmplTest.java    |  259 +++++
 .../functions/codegen/RowAggTmplTest.java       |  142 +++
 .../scripts/functions/codegen/Algorithm_GLM.R   | 1081 ++++++++++++++++++
 .../scripts/functions/codegen/Algorithm_GLM.dml | 1053 +++++++++++++++++
 .../functions/codegen/Algorithm_Kmeans.dml      |  243 ++++
 .../scripts/functions/codegen/Algorithm_L2SVM.R |   98 ++
 .../functions/codegen/Algorithm_L2SVM.dml       |  106 ++
 .../functions/codegen/Algorithm_LinregCG.R      |   57 +
 .../functions/codegen/Algorithm_LinregCG.dml    |   56 +
 .../functions/codegen/Algorithm_MLogreg.R       |  278 +++++
 .../functions/codegen/Algorithm_MLogreg.dml     |  274 +++++
 .../scripts/functions/codegen/Algorithm_MSVM.R  |  133 +++
 .../functions/codegen/Algorithm_MSVM.dml        |  150 +++
 .../scripts/functions/codegen/Algorithm_PNMF.R  |   43 +
 .../functions/codegen/Algorithm_PNMF.dml        |   40 +
 .../functions/codegen/DAGcellwisetmpl1.R        |   36 +
 .../functions/codegen/DAGcellwisetmpl1.dml      |   31 +
 .../functions/codegen/DAGcellwisetmpl2.R        |   36 +
 .../functions/codegen/DAGcellwisetmpl2.dml      |   31 +
 .../functions/codegen/DAGcellwisetmpl3.R        |   36 +
 .../functions/codegen/DAGcellwisetmpl3.dml      |   31 +
 .../codegen/SystemML-config-codegen.xml         |   61 +
 .../scripts/functions/codegen/cellwisetmpl1.R   |   43 +
 .../scripts/functions/codegen/cellwisetmpl1.dml |   27 +
 .../scripts/functions/codegen/cellwisetmpl2.R   |   31 +
 .../scripts/functions/codegen/cellwisetmpl2.dml |   28 +
 .../scripts/functions/codegen/cellwisetmpl3.R   |   31 +
 .../scripts/functions/codegen/cellwisetmpl3.dml |   24 +
 .../scripts/functions/codegen/cellwisetmpl4.R   |   32 +
 .../scripts/functions/codegen/cellwisetmpl4.dml |   26 +
 .../scripts/functions/codegen/cellwisetmpl5.R   |   34 +
 .../scripts/functions/codegen/cellwisetmpl5.dml |   29 +
 .../scripts/functions/codegen/cellwisetmpl6.R   |   33 +
 .../scripts/functions/codegen/cellwisetmpl6.dml |   54 +
 .../functions/codegen/codegenIntegration.R      |   45 +
 .../functions/codegen/codegenIntegration.dml    |   67 ++
 .../scripts/functions/codegen/rowAggPattern1.R  |   29 +
 .../functions/codegen/rowAggPattern1.dml        |   26 +
 .../scripts/functions/codegen/rowAggPattern2.R  |   31 +
 .../functions/codegen/rowAggPattern2.dml        |   30 +
 .../scripts/functions/codegen/rowAggPattern3.R  |   31 +
 .../functions/codegen/rowAggPattern3.dml        |   30 +
 .../scripts/functions/codegen/rowAggPattern4.R  |   27 +
 .../functions/codegen/rowAggPattern4.dml        |   25 +
 src/test/scripts/functions/codegen/wcemm.R      |   35 +
 src/test/scripts/functions/codegen/wcemm.dml    |   30 +
 src/test/scripts/functions/codegen/wdivmm.R     |   32 +
 src/test/scripts/functions/codegen/wdivmm.dml   |   29 +
 .../scripts/functions/codegen/wdivmmRight.R     |   32 +
 .../scripts/functions/codegen/wdivmmRight.dml   |   32 +
 .../functions/codegen/wdivmmRightNotranspose.R  |   32 +
 .../codegen/wdivmmRightNotranspose.dml          |   31 +
 .../functions/codegen/wdivmmTransposeOut.R      |   32 +
 .../functions/codegen/wdivmmTransposeOut.dml    |   30 +
 .../scripts/functions/codegen/wdivmmbasic.R     |   32 +
 .../scripts/functions/codegen/wdivmmbasic.dml   |   30 +
 src/test/scripts/functions/codegen/wsigmoid.R   |   33 +
 src/test/scripts/functions/codegen/wsigmoid.dml |   30 +
 .../functions/codegen/ZPackageSuite.java        |   45 +
 77 files changed, 7131 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/conf/SystemML-config.xml.template
----------------------------------------------------------------------
diff --git a/conf/SystemML-config.xml.template b/conf/SystemML-config.xml.template
index 9fc2aef..da80039 100644
--- a/conf/SystemML-config.xml.template
+++ b/conf/SystemML-config.xml.template
@@ -53,4 +53,16 @@
    
    <!-- enables multi-threaded read/write of text formats in singlenode control program -->
    <cp.parallel.textio>true</cp.parallel.textio>
+   
+   <!-- enables compressed linear algebra, experimental feature -->
+   <compressed.linalg>false</compressed.linalg>
+   
+   <!-- enables operator fusion via code generation, experimental feature -->
+   <codegen.enabled>false</codegen.enabled>
+   
+   <!-- if codegen.enabled, enables source code caching of fused operators -->
+   <codegen.plancache>false</codegen.plancache>
+   
+   <!-- if codegen.enabled, compile literals as constants: 1..heuristic, 2..always -->
+   <codegen.literals>1</codegen.literals>
 </root>

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index 83d0f5b..80c78c1 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -56,6 +56,7 @@ import org.apache.sysml.debug.DMLDebuggerProgramInfo;
 import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.OptimizerUtils.OptimizationLevel;
+import org.apache.sysml.hops.codegen.SpoofCompiler;
 import org.apache.sysml.hops.globalopt.GlobalOptimizerWrapper;
 import org.apache.sysml.lops.Lop;
 import org.apache.sysml.lops.LopsException;
@@ -606,6 +607,20 @@ public class DMLScript
 					 +"Memory Budget = " + ((double)OptimizerUtils.getLocalMemBudget()/1024/1024) + " MB" + "\n");
 		}
 
+		//Step 5.1: Generate code for the rewrited Hop dags 
+		if( dmlconf.getBooleanValue(DMLConfig.CODEGEN) ){
+			SpoofCompiler.USE_PLAN_CACHE = dmlconf.getBooleanValue(DMLConfig.CODEGEN_PLANCACHE);
+			SpoofCompiler.ALWAYS_COMPILE_LITERALS = (dmlconf.getIntValue(DMLConfig.CODEGEN_LITERALS)==2);
+			
+			dmlt.codgenHopsDAG(prog);
+			
+			if (LOG.isDebugEnabled()) {
+				LOG.debug("\n********************** HOPS DAG (After Codegen) *******************");
+				dmlt.printHops(prog);
+				
+			}
+		}
+		
 		//Step 6: construct lops (incl exec type and op selection)
 		dmlt.constructLops(prog);
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/conf/DMLConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/conf/DMLConfig.java b/src/main/java/org/apache/sysml/conf/DMLConfig.java
index 922ba82..3d0fb28 100644
--- a/src/main/java/org/apache/sysml/conf/DMLConfig.java
+++ b/src/main/java/org/apache/sysml/conf/DMLConfig.java
@@ -67,10 +67,14 @@ public class DMLConfig
 	public static final String YARN_APPMASTER       = "dml.yarn.appmaster"; 	
 	public static final String YARN_APPMASTERMEM    = "dml.yarn.appmaster.mem"; 
 	public static final String YARN_MAPREDUCEMEM    = "dml.yarn.mapreduce.mem"; 
-	public static final String YARN_APPQUEUE    	= "dml.yarn.app.queue"; 
+	public static final String YARN_APPQUEUE        = "dml.yarn.app.queue"; 
 	public static final String CP_PARALLEL_MATRIXMULT = "cp.parallel.matrixmult";
 	public static final String CP_PARALLEL_TEXTIO   = "cp.parallel.textio";
 	public static final String COMPRESSED_LINALG    = "compressed.linalg";
+	public static final String CODEGEN              = "codegen.enabled"; //boolean
+	public static final String CODEGEN_PLANCACHE    = "codegen.plancache"; //boolean
+	public static final String CODEGEN_LITERALS     = "codegen.literals"; //1..heuristic, 2..always
+
 	// Fraction of available memory to use. The available memory is computer when the JCudaContext is created
 	// to handle the tradeoff on calling cudaMemGetInfo too often.
 	public static final String GPU_MEMORY_UTILIZATION_FACTOR    = "gpu.memory.util.factor";
@@ -107,6 +111,10 @@ public class DMLConfig
 		_defaultVals.put(CP_PARALLEL_MATRIXMULT, "true" );
 		_defaultVals.put(CP_PARALLEL_TEXTIO,     "true" );
 		_defaultVals.put(COMPRESSED_LINALG,      "false" );
+		_defaultVals.put(CODEGEN,                "false" );
+		_defaultVals.put(CODEGEN_PLANCACHE,      "true" );
+		_defaultVals.put(CODEGEN_LITERALS,       "1" );
+		
 		_defaultVals.put(GPU_MEMORY_UTILIZATION_FACTOR,      "0.9" );
 		_defaultVals.put(REFRESH_AVAILABLE_MEMORY_EVERY_TIME,      "true" );
 	}
@@ -392,7 +400,8 @@ public class DMLConfig
 				LOCAL_TMP_DIR,SCRATCH_SPACE,OPTIMIZATION_LEVEL,
 				NUM_REDUCERS, DEFAULT_BLOCK_SIZE,
 				YARN_APPMASTER, YARN_APPMASTERMEM, YARN_MAPREDUCEMEM, 
-				CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO
+				CP_PARALLEL_MATRIXMULT, CP_PARALLEL_TEXTIO,
+				COMPRESSED_LINALG, CODEGEN, CODEGEN_LITERALS, CODEGEN_PLANCACHE,
 		}; 
 		
 		StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
index 86b7968..6efd799 100644
--- a/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
+++ b/src/main/java/org/apache/sysml/hops/OptimizerUtils.java
@@ -110,6 +110,7 @@ public class OptimizerUtils
 	public static boolean ALLOW_CONSTANT_FOLDING = true;
 	
 	public static boolean ALLOW_ALGEBRAIC_SIMPLIFICATION = true; 
+	public static boolean ALLOW_OPERATOR_FUSION = true; 
 	
 	/**
 	 * Enables if-else branch removal for constant predicates (original literals or 
@@ -272,7 +273,7 @@ public class OptimizerUtils
 
 		//handle optimization level
 		int optlevel = dmlconf.getIntValue(DMLConfig.OPTIMIZATION_LEVEL);
-		if( optlevel < 0 || optlevel > 5 )
+		if( optlevel < 0 || optlevel > 7 )
 			throw new DMLRuntimeException("Error: invalid optimization level '"+optlevel+"' (valid values: 0-5).");
 	
 		// This overrides any optimization level that is present in the configuration file.
@@ -336,6 +337,20 @@ public class OptimizerUtils
 				cconf.set(ConfigType.ALLOW_DYN_RECOMPILATION, false);
 				cconf.set(ConfigType.ALLOW_INDIVIDUAL_SB_SPECIFIC_OPS, false);
 				break;
+			
+			// opt level 6 and7: SPOOF w/o fused operators, otherwise same as O2
+			// (hidden optimization levels not documented on purpose, as they will
+			// be removed once SPOOF is production ready)	
+			case 6:
+				cconf.set(ConfigType.OPT_LEVEL, OptimizationLevel.O2_LOCAL_MEMORY_DEFAULT.ordinal());
+				ALLOW_AUTO_VECTORIZATION = false;
+				break;
+			case 7:				
+				cconf.set(ConfigType.OPT_LEVEL, OptimizationLevel.O2_LOCAL_MEMORY_DEFAULT.ordinal());
+				ALLOW_OPERATOR_FUSION = false;
+				ALLOW_AUTO_VECTORIZATION = false;
+				ALLOW_SUM_PRODUCT_REWRITES = false;
+				break;	
 		}
 		
 		//handle parallel text io (incl awareness of thread contention in <jdk8)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
index 8b121d7..da13d0a 100644
--- a/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysml/hops/recompile/Recompiler.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.wink.json4j.JSONObject;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.conf.DMLConfig;
 import org.apache.sysml.conf.CompilerConfig.ConfigType;
 import org.apache.sysml.hops.DataGenOp;
 import org.apache.sysml.hops.DataOp;
@@ -52,6 +53,7 @@ import org.apache.sysml.hops.MemoTable;
 import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.UnaryOp;
+import org.apache.sysml.hops.codegen.SpoofCompiler;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.hops.rewrite.ProgramRewriter;
 import org.apache.sysml.lops.CSVReBlock;
@@ -210,6 +212,12 @@ public class Recompiler
 				hopRoot.refreshMemEstimates(memo); 
 			memo.extract(hops, status);
 			
+			// codegen if enabled
+			if( ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.CODEGEN) && SpoofCompiler.RECOMPILE ) {
+				Hop.resetVisitStatus(hops);
+				hops = SpoofCompiler.optimize(hops, SpoofCompiler.ALWAYS_COMPILE_LITERALS);
+			}
+			
 			// construct lops			
 			Dag<Lop> dag = new Dag<Lop>();
 			for( Hop hopRoot : hops ){
@@ -304,6 +312,12 @@ public class Recompiler
 			hops.resetVisitStatus();
 			hops.refreshMemEstimates(memo); 		
 			
+			// codegen if enabled
+			if( ConfigurationManager.getDMLConfig().getBooleanValue(DMLConfig.CODEGEN) && SpoofCompiler.RECOMPILE ) {
+				hops.resetVisitStatus();
+				hops = SpoofCompiler.optimize(hops, false);
+			}
+			
 			// construct lops			
 			Dag<Lop> dag = new Dag<Lop>();
 			Lop lops = hops.constructLops();

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
index cc67cc1..20c1eeb 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationDynamic.java
@@ -41,6 +41,7 @@ import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.hops.IndexingOp;
 import org.apache.sysml.hops.LeftIndexingOp;
 import org.apache.sysml.hops.LiteralOp;
+import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.UnaryOp;
@@ -149,11 +150,13 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hi = removeUnnecessaryRightIndexing(hop, hi, i);  //e.g., X[,1] -> X, if output == input size 
 			hi = removeEmptyLeftIndexing(hop, hi, i);         //e.g., X[,1]=Y -> matrix(0,nrow(X),ncol(X)), if nnz(X)==0 and nnz(Y)==0 
 			hi = removeUnnecessaryLeftIndexing(hop, hi, i);   //e.g., X[,1]=Y -> Y, if output == input dims 
-			hi = fuseLeftIndexingChainToAppend(hop, hi, i);   //e.g., X[,1]=A; X[,2]=B -> X=cbind(A,B), iff ncol(X)==2 and col1/2 lix
+			if(OptimizerUtils.ALLOW_OPERATOR_FUSION)
+				hi = fuseLeftIndexingChainToAppend(hop, hi, i);   //e.g., X[,1]=A; X[,2]=B -> X=cbind(A,B), iff ncol(X)==2 and col1/2 lix
 			hi = removeUnnecessaryCumulativeOp(hop, hi, i);   //e.g., cumsum(X) -> X, if nrow(X)==1;
 			hi = removeUnnecessaryReorgOperation(hop, hi, i); //e.g., matrix(X) -> X, if dims(in)==dims(out); r(X)->X, if 1x1 dims
 			hi = removeUnnecessaryOuterProduct(hop, hi, i);   //e.g., X*(Y%*%matrix(1,...) -> X*Y, if Y col vector
-			hi = fuseDatagenAndReorgOperation(hop, hi, i);    //e.g., t(rand(rows=10,cols=1)) -> rand(rows=1,cols=10), if one dim=1
+			if(OptimizerUtils.ALLOW_OPERATOR_FUSION)
+				hi = fuseDatagenAndReorgOperation(hop, hi, i);    //e.g., t(rand(rows=10,cols=1)) -> rand(rows=1,cols=10), if one dim=1
 			hi = simplifyColwiseAggregate(hop, hi, i);        //e.g., colsums(X) -> sum(X) or X, if col/row vector
 			hi = simplifyRowwiseAggregate(hop, hi, i);        //e.g., rowsums(X) -> sum(X) or X, if row/col vector
 			hi = simplifyColSumsMVMult(hop, hi, i);           //e.g., colSums(X*Y) -> t(Y) %*% X, if Y col vector
@@ -171,14 +174,16 @@ public class RewriteAlgebraicSimplificationDynamic extends HopRewriteRule
 			hi = simplifySumDiagToTrace(hi);                  //e.g., sum(diag(X)) -> trace(X); if col vector
 			hi = pushdownBinaryOperationOnDiag(hop, hi, i);   //e.g., diag(X)*7 -> diag(X*7); if col vector
 			hi = pushdownSumOnAdditiveBinary(hop, hi, i);     //e.g., sum(A+B) -> sum(A)+sum(B); if dims(A)==dims(B)
-			hi = simplifyWeightedSquaredLoss(hop, hi, i);     //e.g., sum(W * (X - U %*% t(V)) ^ 2) -> wsl(X, U, t(V), W, true), 
-			hi = simplifyWeightedSigmoidMMChains(hop, hi, i); //e.g., W * sigmoid(Y%*%t(X)) -> wsigmoid(W, Y, t(X), type)
-			hi = simplifyWeightedDivMM(hop, hi, i);           //e.g., t(U) %*% (X/(U%*%t(V))) -> wdivmm(X, U, t(V), left)
-			hi = simplifyWeightedCrossEntropy(hop, hi, i);    //e.g., sum(X*log(U%*%t(V))) -> wcemm(X, U, t(V))
-			hi = simplifyWeightedUnaryMM(hop, hi, i);         //e.g., X*exp(U%*%t(V)) -> wumm(X, U, t(V), exp)
-			hi = simplifyDotProductSum(hop, hi, i);           //e.g., sum(v^2) -> t(v)%*%v if ncol(v)==1 
-			hi = fuseSumSquared(hop, hi, i);                  //e.g., sum(X^2) -> sumSq(X), if ncol(X)>1
-			hi = fuseAxpyBinaryOperationChain(hop, hi, i);    //e.g., (X+s*Y) -> (X+*s Y), (X-s*Y) -> (X-*s Y) 	
+			if(OptimizerUtils.ALLOW_OPERATOR_FUSION) {
+				hi = simplifyWeightedSquaredLoss(hop, hi, i);     //e.g., sum(W * (X - U %*% t(V)) ^ 2) -> wsl(X, U, t(V), W, true), 
+				hi = simplifyWeightedSigmoidMMChains(hop, hi, i); //e.g., W * sigmoid(Y%*%t(X)) -> wsigmoid(W, Y, t(X), type)
+				hi = simplifyWeightedDivMM(hop, hi, i);           //e.g., t(U) %*% (X/(U%*%t(V))) -> wdivmm(X, U, t(V), left)
+				hi = simplifyWeightedCrossEntropy(hop, hi, i);    //e.g., sum(X*log(U%*%t(V))) -> wcemm(X, U, t(V))
+				hi = simplifyWeightedUnaryMM(hop, hi, i);         //e.g., X*exp(U%*%t(V)) -> wumm(X, U, t(V), exp)
+				hi = simplifyDotProductSum(hop, hi, i);           //e.g., sum(v^2) -> t(v)%*%v if ncol(v)==1 
+				hi = fuseSumSquared(hop, hi, i);                  //e.g., sum(X^2) -> sumSq(X), if ncol(X)>1
+				hi = fuseAxpyBinaryOperationChain(hop, hi, i);    //e.g., (X+s*Y) -> (X+*s Y), (X-s*Y) -> (X-*s Y) 	
+			}
 			hi = reorderMinusMatrixMult(hop, hi, i);          //e.g., (-t(X))%*%y->-(t(X)%*%y), TODO size
 			hi = simplifySumMatrixMult(hop, hi, i);           //e.g., sum(A%*%B) -> sum(t(colSums(A))*rowSums(B)), if not dot product / wsloss
 			hi = simplifyEmptyBinaryOperation(hop, hi, i);    //e.g., X*Y -> matrix(0,nrow(X), ncol(X)) / X+Y->X / X-Y -> X

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
index 2ae27c8..5e97829 100644
--- a/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
+++ b/src/main/java/org/apache/sysml/hops/rewrite/RewriteAlgebraicSimplificationStatic.java
@@ -42,6 +42,7 @@ import org.apache.sysml.hops.Hop.ParamBuiltinOp;
 import org.apache.sysml.hops.Hop.ReOrgOp;
 import org.apache.sysml.hops.HopsException;
 import org.apache.sysml.hops.LiteralOp;
+import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.hops.Hop.OpOp2;
 import org.apache.sysml.hops.ParameterizedBuiltinOp;
 import org.apache.sysml.hops.ReorgOp;
@@ -142,7 +143,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
  			hi = simplifyBinaryToUnaryOperation(hop, hi, i);     //e.g., X*X -> X^2 (pow2), X+X -> X*2, (X>0)-(X<0) -> sign(X)
  			hi = canonicalizeMatrixMultScalarAdd(hi);            //e.g., eps+U%*%t(V) -> U%*%t(V)+eps, U%*%t(V)-eps -> U%*%t(V)+(-eps) 
  			hi = simplifyReverseOperation(hop, hi, i);           //e.g., table(seq(1,nrow(X),1),seq(nrow(X),1,-1)) %*% X -> rev(X)
-			hi = simplifyMultiBinaryToBinaryOperation(hi);       //e.g., 1-X*Y -> X 1-* Y
+ 			if(OptimizerUtils.ALLOW_OPERATOR_FUSION)
+ 				hi = simplifyMultiBinaryToBinaryOperation(hi);       //e.g., 1-X*Y -> X 1-* Y
  			hi = simplifyDistributiveBinaryOperation(hop, hi, i);//e.g., (X-Y*X) -> (1-Y)*X
  			hi = simplifyBushyBinaryOperation(hop, hi, i);       //e.g., (X*(Y*(Z%*%v))) -> (X*Y)*(Z%*%v)
  			hi = simplifyUnaryAggReorgOperation(hop, hi, i);     //e.g., sum(t(X)) -> sum(X)
@@ -152,7 +154,8 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
  			hi = pushdownSumBinaryMult(hop, hi, i);              //e.g., sum(lamda*X) -> lamda*sum(X)
  			hi = simplifyUnaryPPredOperation(hop, hi, i);        //e.g., abs(ppred()) -> ppred(), others: round, ceil, floor
  			hi = simplifyTransposedAppend(hop, hi, i);           //e.g., t(cbind(t(A),t(B))) -> rbind(A,B);
- 			hi = fuseBinarySubDAGToUnaryOperation(hop, hi, i);   //e.g., X*(1-X)-> sprop(X) || 1/(1+exp(-X)) -> sigmoid(X) || X*(X>0) -> selp(X)
+ 			if(OptimizerUtils.ALLOW_OPERATOR_FUSION)
+ 				hi = fuseBinarySubDAGToUnaryOperation(hop, hi, i);   //e.g., X*(1-X)-> sprop(X) || 1/(1+exp(-X)) -> sigmoid(X) || X*(X>0) -> selp(X)
 			hi = simplifyTraceMatrixMult(hop, hi, i);            //e.g., trace(X%*%Y)->sum(X*t(Y));  
 			hi = simplifySlicedMatrixMult(hop, hi, i);           //e.g., (X%*%Y)[1,1] -> X[1,] %*% Y[,1];
 			hi = simplifyConstantSort(hop, hi, i);               //e.g., order(matrix())->matrix/seq; 
@@ -161,9 +164,11 @@ public class RewriteAlgebraicSimplificationStatic extends HopRewriteRule
 			hi = simplifyTransposeAggBinBinaryChains(hop, hi, i);//e.g., t(t(A)%*%t(B)+C) -> B%*%A+t(C)
 			hi = removeUnnecessaryMinus(hop, hi, i);             //e.g., -(-X)->X; potentially introduced by simplfiy binary or dyn rewrites
 			hi = simplifyGroupedAggregate(hi);          	     //e.g., aggregate(target=X,groups=y,fn="count") -> aggregate(target=y,groups=y,fn="count")
-			hi = fuseMinusNzBinaryOperation(hop, hi, i);         //e.g., X-mean*ppred(X,0,!=) -> X -nz mean
-			hi = fuseLogNzUnaryOperation(hop, hi, i);            //e.g., ppred(X,0,"!=")*log(X) -> log_nz(X)
-			hi = fuseLogNzBinaryOperation(hop, hi, i);           //e.g., ppred(X,0,"!=")*log(X,0.5) -> log_nz(X,0.5)
+			if(OptimizerUtils.ALLOW_OPERATOR_FUSION) {
+				hi = fuseMinusNzBinaryOperation(hop, hi, i);         //e.g., X-mean*ppred(X,0,!=) -> X -nz mean
+				hi = fuseLogNzUnaryOperation(hop, hi, i);            //e.g., ppred(X,0,"!=")*log(X) -> log_nz(X)
+				hi = fuseLogNzBinaryOperation(hop, hi, i);           //e.g., ppred(X,0,"!=")*log(X,0.5) -> log_nz(X,0.5)
+			}
 			hi = simplifyOuterSeqExpand(hop, hi, i);             //e.g., outer(v, seq(1,m), "==") -> rexpand(v, max=m, dir=row, ignore=true, cast=false)
 			hi = simplifyTableSeqExpand(hop, hi, i);             //e.g., table(seq(1,nrow(v)), v, nrow(v), m) -> rexpand(v, max=m, dir=row, ignore=false, cast=true)
 			//hi = removeUnecessaryPPred(hop, hi, i);            //e.g., ppred(X,X,"==")->matrix(1,rows=nrow(X),cols=ncol(X))

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
index 0063997..61dff7c 100644
--- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java
+++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java
@@ -56,6 +56,7 @@ import org.apache.sysml.hops.ParameterizedBuiltinOp;
 import org.apache.sysml.hops.ReorgOp;
 import org.apache.sysml.hops.TernaryOp;
 import org.apache.sysml.hops.UnaryOp;
+import org.apache.sysml.hops.codegen.SpoofCompiler;
 import org.apache.sysml.hops.ipa.InterProceduralAnalysis;
 import org.apache.sysml.hops.recompile.Recompiler;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
@@ -278,6 +279,11 @@ public class DMLTranslator
 		resetHopsDAGVisitStatus(dmlp);
 	}
 	
+	public void codgenHopsDAG(DMLProgram dmlp) 
+		throws LanguageException, HopsException, DMLRuntimeException 
+	{
+		SpoofCompiler.generateCode(dmlp);	
+	}
 	
 	public void constructLops(DMLProgram dmlp) throws ParseException, LanguageException, HopsException, LopsException {
 

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
index e5ed921..e6123ef 100644
--- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java
@@ -1774,4 +1774,11 @@ public abstract class AutomatedTestBase
 	{
 		return writeInputFrame(name, data, false, schema, oi);
 	}
+	
+	protected boolean heavyHittersContainsSubString(String str) {
+		for( String opcode : Statistics.getCPHeavyHitterOpCodes())
+			if(opcode.contains(str))
+				return true;
+		return false;		
+	}
 }

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
new file mode 100644
index 0000000..1dc8e1f
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmGLM.java
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmGLM extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_GLM";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmGLM.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	//private final static double eps = 1e-5;
+	
+	private final static int rows = 2468;
+	private final static int cols = 1007;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 5; //inner/outer
+	
+	public enum GLMType {
+		POISSON_LOG,
+		GAMMA_LOG,
+		BINOMIAL_PROBIT,
+	}
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testGLMPoissonDenseRewritesCP() {
+		runGLMTest(GLMType.POISSON_LOG, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMPoissonSparseRewritesCP() {
+		runGLMTest(GLMType.POISSON_LOG, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMPoissonDenseCP() {
+		runGLMTest(GLMType.POISSON_LOG, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMPoissonSparseCP() {
+		runGLMTest(GLMType.POISSON_LOG, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMGammaDenseRewritesCP() {
+		runGLMTest(GLMType.GAMMA_LOG, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMGammaSparseRewritesCP() {
+		runGLMTest(GLMType.GAMMA_LOG, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMGammaDenseCP() {
+		runGLMTest(GLMType.GAMMA_LOG, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMGammaSparseCP() {
+		runGLMTest(GLMType.GAMMA_LOG, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMBinomialDenseRewritesCP() {
+		runGLMTest(GLMType.BINOMIAL_PROBIT, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMBinomialSparseRewritesCP() {
+		runGLMTest(GLMType.BINOMIAL_PROBIT, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMBinomialDenseCP() {
+		runGLMTest(GLMType.BINOMIAL_PROBIT, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testGLMBinomialSparseCP() {
+		runGLMTest(GLMType.BINOMIAL_PROBIT, false, true, ExecType.CP);
+	}
+	
+	private void runGLMTest( GLMType type, boolean rewrites, boolean sparse, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = TEST_NAME1;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			String[] addArgs = new String[4];
+			switch(type) {
+				case POISSON_LOG: //dfam, vpow, link, vpow
+					addArgs[0] = "1"; addArgs[1] = "1.0"; addArgs[2] = "1"; addArgs[3] = "0.0";
+					break;
+				case GAMMA_LOG:   //dfam, vpow, link, vpow
+					addArgs[0] = "1"; addArgs[1] = "2.0"; addArgs[2] = "1"; addArgs[3] = "0.0";
+					break;
+				case BINOMIAL_PROBIT: //dfam, vpow, link, yneg 
+					addArgs[0] = "2"; addArgs[1] = "0.0"; addArgs[2] = "3"; addArgs[3] = "2";
+					break;
+			}
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("Y"),
+				String.valueOf(intercept), String.valueOf(epsilon), String.valueOf(maxiter), 
+				addArgs[0], addArgs[1], addArgs[2], addArgs[3], output("w")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), addArgs[0], addArgs[1], addArgs[2], addArgs[3], expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 0, 1, 1.0, 136));
+			writeInputMatrixWithMTD("Y", y, true);
+			
+			runTest(true, false, null, -1); 
+			//TODO fix R glm script
+			//runRScript(true); 
+			
+			//compare matrices 
+			//HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			//HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			//TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
new file mode 100644
index 0000000..907d0ca
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmKMeans.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmKMeans extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_KMeans";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmKMeans.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	//private final static double eps = 1e-5;
+	
+	private final static int rows = 3972;
+	private final static int cols = 972;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "C" })); 
+	}
+
+	@Test
+	public void testKMeansDenseBinSingleRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, false, 2, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseBinSingleRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, true, 2, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseBinSingleCP() {
+		runKMeansTest(TEST_NAME1, false, false, 2, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseBinSingleCP() {
+		runKMeansTest(TEST_NAME1, false, true, 2, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseBinMultiRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, false, 2, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseBinMultiRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, true, 2, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseBinMultiCP() {
+		runKMeansTest(TEST_NAME1, false, false, 2, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseBinMultiCP() {
+		runKMeansTest(TEST_NAME1, false, true, 2, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseMulSingleRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, false, 20, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseMulSingleRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, true, 20, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseMulSingleCP() {
+		runKMeansTest(TEST_NAME1, false, false, 20, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseMulSingleCP() {
+		runKMeansTest(TEST_NAME1, false, true, 20, 1, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseMulMultiRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, false, 20, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseMulMultiRewritesCP() {
+		runKMeansTest(TEST_NAME1, true, true, 20, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansDenseMulMultiCP() {
+		runKMeansTest(TEST_NAME1, false, false, 20, 10, ExecType.CP);
+	}
+	
+	@Test
+	public void testKMeansSparseMulMultiCP() {
+		runKMeansTest(TEST_NAME1, false, true, 20, 10, ExecType.CP);
+	}
+	
+	private void runKMeansTest( String testname, boolean rewrites, boolean sparse, int centroids, int runs, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "hops", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), String.valueOf(centroids),
+				String.valueOf(runs), String.valueOf(epsilon), String.valueOf(maxiter), output("C")};
+
+			//rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+			//	String.valueOf(maxiter), expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			
+			runTest(true, false, null, -1); 
+			
+			//no comparison with R due to randomized algorithm
+			//runRScript(true); 
+			//compare matrices 
+			//HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("C");
+			//HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("C");
+			//TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
new file mode 100644
index 0000000..f93808b
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmL2SVM.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmL2SVM extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_L2SVM";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmL2SVM.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private final static double eps = 1e-5;
+	
+	private final static int rows = 1468;
+	private final static int cols = 1007;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testL2SVMDenseRewritesCP() {
+		runL2SVMTest(TEST_NAME1, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testL2SVMSparseRewritesCP() {
+		runL2SVMTest(TEST_NAME1, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testL2SVMDenseCP() {
+		runL2SVMTest(TEST_NAME1, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testL2SVMSparseCP() {
+		runL2SVMTest(TEST_NAME1, false, true, ExecType.CP);
+	}
+
+	@Test
+	public void testL2SVMDenseRewritesSP() {
+		runL2SVMTest(TEST_NAME1, true, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testL2SVMSparseRewritesSP() {
+		runL2SVMTest(TEST_NAME1, true, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testL2SVMDenseSP() {
+		runL2SVMTest(TEST_NAME1, false, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testL2SVMSparseSP() {
+		runL2SVMTest(TEST_NAME1, false, true, ExecType.SPARK);
+	}
+	
+	private void runL2SVMTest( String testname, boolean rewrites, boolean sparse, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("Y"),
+				String.valueOf(intercept), String.valueOf(epsilon),
+				String.valueOf(maxiter), output("w")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 0, 1, 1.0, 136));
+			writeInputMatrixWithMTD("Y", y, true);
+			
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
new file mode 100644
index 0000000..25c5f03
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmLinregCG.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmLinregCG extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_LinregCG";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmLinregCG.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private final static double eps = 1e-5;
+	
+	private final static int rows = 1468;
+	private final static int cols = 1007;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testLinregCGDenseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCGSparseRewritesCP() {
+		runLinregCGTest(TEST_NAME1, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCGDenseCP() {
+		runLinregCGTest(TEST_NAME1, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testLinregCGSparseCP() {
+		runLinregCGTest(TEST_NAME1, false, true, ExecType.CP);
+	}
+
+	/*
+	@Test
+	public void testLinregCGDenseSP() {
+		runGDFOTest(TEST_NAME1, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testLinregCGSparseSP() {
+		runGDFOTest(TEST_NAME1, true, ExecType.SPARK);
+	}
+	*/
+	
+	private void runLinregCGTest( String testname, boolean rewrites, boolean sparse, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("y"),
+				String.valueOf(intercept), String.valueOf(epsilon),
+				String.valueOf(maxiter), output("w")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), expectedDir());
+	
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 7);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = getRandomMatrix(rows, 1, 0, 10, 1.0, 3);
+			writeInputMatrixWithMTD("y", y, true);
+			
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
new file mode 100644
index 0000000..394902e
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMLogreg.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmMLogreg extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_MLogreg";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmMLogreg.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private final static double eps = 1e-5;
+	
+	private final static int rows = 3468;
+	private final static int cols = 327;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testMlogregBinDenseRewritesCP() {
+		runMlogregTest(TEST_NAME1, 2, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregBinSparseRewritesCP() {
+		runMlogregTest(TEST_NAME1, 2, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregBinDenseCP() {
+		runMlogregTest(TEST_NAME1, 2, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregBinSparseCP() {
+		runMlogregTest(TEST_NAME1, 2, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregMulDenseRewritesCP() {
+		runMlogregTest(TEST_NAME1, 5, true, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregMulSparseRewritesCP() {
+		runMlogregTest(TEST_NAME1, 5, true, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregMulDenseCP() {
+		runMlogregTest(TEST_NAME1, 5, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testMlogregMulSparseCP() {
+		runMlogregTest(TEST_NAME1, 5, false, true, ExecType.CP);
+	}
+
+	@Test
+	public void testMlogregBinDenseRewritesSP() {
+		runMlogregTest(TEST_NAME1, 2, true, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregBinSparseRewritesSP() {
+		runMlogregTest(TEST_NAME1, 2, true, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregBinDenseSP() {
+		runMlogregTest(TEST_NAME1, 2, false, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregBinSparseSP() {
+		runMlogregTest(TEST_NAME1, 2, false, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregMulDenseRewritesSP() {
+		runMlogregTest(TEST_NAME1, 5, true, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregMulSparseRewritesSP() {
+		runMlogregTest(TEST_NAME1, 5, true, true, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregMulDenseSP() {
+		runMlogregTest(TEST_NAME1, 5, false, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testMlogregMulSparseSP() {
+		runMlogregTest(TEST_NAME1, 5, false, true, ExecType.SPARK);
+	}
+	
+	private void runMlogregTest( String testname, int classes, boolean rewrites, boolean sparse, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("Y"),
+				String.valueOf(intercept), String.valueOf(epsilon),
+				String.valueOf(maxiter), output("w")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 2384);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 0.51, classes+0.49, 1.0, 9283));
+			writeInputMatrixWithMTD("Y", y, true);
+			
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
new file mode 100644
index 0000000..047ceb0
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmMSVM.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmMSVM extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_MSVM";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmMSVM.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private final static double eps = 1e-5;
+	
+	private final static int rows = 1468;
+	private final static int cols = 1007;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static int intercept = 0;
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testMSVMDenseBinRewritesCP() {
+		runMSVMTest(TEST_NAME1, true, false, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMSparseBinRewritesCP() {
+		runMSVMTest(TEST_NAME1, true, true, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMDenseBinCP() {
+		runMSVMTest(TEST_NAME1, false, false, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMSparseBinCP() {
+		runMSVMTest(TEST_NAME1, false, true, 2, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMDenseMulRewritesCP() {
+		runMSVMTest(TEST_NAME1, true, false, 4, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMSparseMulRewritesCP() {
+		runMSVMTest(TEST_NAME1, true, true, 4, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMDenseMulCP() {
+		runMSVMTest(TEST_NAME1, false, false, 4, ExecType.CP);
+	}
+	
+	@Test
+	public void testMSVMSparseMulCP() {
+		runMSVMTest(TEST_NAME1, false, true, 4, ExecType.CP);
+	}
+	
+	private void runMSVMTest( String testname, boolean rewrites, boolean sparse, int numClasses, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("Y"),
+				String.valueOf(intercept), String.valueOf(epsilon),
+				String.valueOf(maxiter), output("w")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon),
+				String.valueOf(maxiter), expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 1, numClasses, 1.0, 136));
+			writeInputMatrixWithMTD("Y", y, true);
+			
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("w");
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
new file mode 100644
index 0000000..5d7e654
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/AlgorithmPNMF.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class AlgorithmPNMF extends AutomatedTestBase 
+{	
+	private final static String TEST_NAME1 = "Algorithm_PNMF";
+	private final static String TEST_DIR = "functions/codegen/";
+	private final static String TEST_CLASS_DIR = TEST_DIR + AlgorithmPNMF.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private final static double eps = 1e-5;
+	
+	private final static int rows = 1468;
+	private final static int cols = 1207;
+	private final static int rank = 20;
+		
+	private final static double sparsity1 = 0.7; //dense
+	private final static double sparsity2 = 0.1; //sparse
+	
+	private final static double epsilon = 0.000000001;
+	private final static double maxiter = 10;
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "w" })); 
+	}
+
+	@Test
+	public void testPNMFDenseCP() {
+		runPNMFTest(TEST_NAME1, false, false, ExecType.CP);
+	}
+	
+	@Test
+	public void testPNMFSparseCP() {
+		runPNMFTest(TEST_NAME1, false, true, ExecType.CP);
+	}
+	
+	@Test
+	public void testPNMFDenseSP() {
+		runPNMFTest(TEST_NAME1, false, false, ExecType.SPARK);
+	}
+	
+	@Test
+	public void testPNMFSparseSP() {
+		runPNMFTest(TEST_NAME1, false, true, ExecType.SPARK);
+	}
+
+	private void runPNMFTest( String testname, boolean rewrites, boolean sparse, ExecType instType)
+	{
+		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		RUNTIME_PLATFORM platformOld = rtplatform;
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break;
+		}
+	
+		boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG;
+		if( rtplatform == RUNTIME_PLATFORM.SPARK || rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK )
+			DMLScript.USE_LOCAL_SPARK_CONFIG = true;
+
+		try
+		{
+			String TEST_NAME = testname;
+			TestConfiguration config = getTestConfiguration(TEST_NAME);
+			loadTestConfiguration(config);
+			
+			/* This is for running the junit test the new way, i.e., construct the arguments directly */
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			programArgs = new String[]{ "-explain", "-stats",
+				"-config=" + HOME + TEST_CONF, "-args", input("X"), input("W"), input("H"),
+				String.valueOf(rank), String.valueOf(epsilon), String.valueOf(maxiter), 
+				output("W"), output("H")};
+
+			rCmd = getRCmd(inputDir(), String.valueOf(rank), String.valueOf(epsilon), 
+				String.valueOf(maxiter), expectedDir());
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+			
+			//generate actual datasets
+			double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 234);
+			writeInputMatrixWithMTD("X", X, true);
+			double[][] W = getRandomMatrix(rows, rank, 0, 0.025, 1.0, 3);
+			writeInputMatrixWithMTD("W", W, true);
+			double[][] H = getRandomMatrix(rank, cols, 0, 0.025, 1.0, 7);
+			writeInputMatrixWithMTD("H", H, true);
+			
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlW = readDMLMatrixFromHDFS("W");
+			HashMap<CellIndex, Double> dmlH = readDMLMatrixFromHDFS("H");
+			HashMap<CellIndex, Double> rW = readRMatrixFromFS("W");
+			HashMap<CellIndex, Double> rH = readRMatrixFromFS("H");
+			TestUtils.compareMatrices(dmlW, rW, eps, "Stat-DML", "Stat-R");
+			TestUtils.compareMatrices(dmlH, rH, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			rtplatform = platformOld;
+			DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld;
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldFlag;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
new file mode 100644
index 0000000..6313412
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/CellwiseTmplTest.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class CellwiseTmplTest extends AutomatedTestBase 
+{	
+	private static final String TEST_NAME1 = "cellwisetmpl1";
+	private static final String TEST_NAME2 = "cellwisetmpl2";
+	private static final String TEST_NAME3 = "cellwisetmpl3";
+	private static final String TEST_NAME4 = "cellwisetmpl4";
+	private static final String TEST_NAME5 = "cellwisetmpl5";
+	private static final String TEST_NAME6 = "cellwisetmpl6"; //sum
+
+	private static final String TEST_DIR = "functions/codegen/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + CellwiseTmplTest.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private static final double eps = Math.pow(10, -10);
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "1" }) );
+		addTestConfiguration( TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "2" }) );
+		addTestConfiguration( TEST_NAME3, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3, new String[] { "3" }) );
+		addTestConfiguration( TEST_NAME4, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME4, new String[] { "4" }) );
+		addTestConfiguration( TEST_NAME5, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME5, new String[] { "5" }) );
+		addTestConfiguration( TEST_NAME6, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME6, new String[] { "6" }) );
+	}
+		
+	@Test
+	public void testCodegenCellwiseRewrite1() {
+		testCodegenIntegration( TEST_NAME1, true, ExecType.CP );
+	}
+		
+	@Test
+	public void testCodegenCellwiseRewrite2() {
+		testCodegenIntegration( TEST_NAME2, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite3() {
+		testCodegenIntegration( TEST_NAME3, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite4() 
+	{
+		testCodegenIntegration( TEST_NAME4, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite5() {
+		testCodegenIntegration( TEST_NAME5, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwiseRewrite6() {
+		testCodegenIntegration( TEST_NAME6, true, ExecType.CP  );
+	}
+
+	@Test
+	public void testCodegenCellwise1() {
+		testCodegenIntegration( TEST_NAME1, false, ExecType.CP );
+	}
+		
+	@Test
+	public void testCodegenCellwise2() {
+		testCodegenIntegration( TEST_NAME2, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwise3() {
+		testCodegenIntegration( TEST_NAME3, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwise4() 
+	{
+		testCodegenIntegration( TEST_NAME4, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwise5() {
+		testCodegenIntegration( TEST_NAME5, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testCodegenCellwise6() {
+		testCodegenIntegration( TEST_NAME6, false, ExecType.CP  );
+	}
+
+	@Test
+	public void testCodegenCellwiseRewrite1_sp() {
+		testCodegenIntegration( TEST_NAME1, true, ExecType.SPARK );
+	}
+	
+	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
+	{	
+		
+		boolean oldRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: 
+				rtplatform = RUNTIME_PLATFORM.SPARK;
+				DMLScript.USE_LOCAL_SPARK_CONFIG = true; 
+				break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID; break;
+		}
+		
+		try
+		{
+			TestConfiguration config = getTestConfiguration(testname);
+			loadTestConfiguration(config);
+			
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + testname + ".dml";
+			programArgs = new String[]{"-explain", "runtime", "-stats", 
+					"-config=" + HOME + TEST_CONF, "-args", output("S") };
+			
+			fullRScriptName = HOME + testname + ".R";
+			rCmd = getRCmd(inputDir(), expectedDir());			
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			//System.exit(1);
+			if(testname.equals(TEST_NAME6)) //tak+
+			{
+				//compare scalars 
+				HashMap<CellIndex, Double> dmlfile = readDMLScalarFromHDFS("S");
+				HashMap<CellIndex, Double> rfile  = readRScalarFromFS("S");
+				TestUtils.compareScalars((Double) dmlfile.values().toArray()[0], (Double) rfile.values().toArray()[0],0);
+			}
+			else
+			{
+				//compare matrices 
+				HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("S");
+				HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("S");	
+				TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+				if( !(rewrites && testname.equals(TEST_NAME2)) ) //sigmoid
+					Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+			}
+		}
+		finally {
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}	
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/bbefe96b/src/test/java/org/apache/sysml/test/integration/functions/codegen/DAGCellwiseTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/DAGCellwiseTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/DAGCellwiseTmplTest.java
new file mode 100644
index 0000000..65be916
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/DAGCellwiseTmplTest.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.test.integration.functions.codegen;
+
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.sysml.api.DMLScript;
+import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
+import org.apache.sysml.hops.OptimizerUtils;
+import org.apache.sysml.lops.LopProperties.ExecType;
+import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysml.test.integration.AutomatedTestBase;
+import org.apache.sysml.test.integration.TestConfiguration;
+import org.apache.sysml.test.utils.TestUtils;
+
+public class DAGCellwiseTmplTest extends AutomatedTestBase 
+{	
+	private static final String TEST_NAME1 = "DAGcellwisetmpl1";
+	private static final String TEST_NAME2 = "DAGcellwisetmpl2";
+	private static final String TEST_NAME3 = "DAGcellwisetmpl3";
+	
+	private static final String TEST_DIR = "functions/codegen/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + DAGCellwiseTmplTest.class.getSimpleName() + "/";
+	private final static String TEST_CONF = "SystemML-config-codegen.xml";
+	
+	private static final double eps = Math.pow(10, -10);
+	
+	@Override
+	public void setUp() {
+		TestUtils.clearAssertionInformation();
+		addTestConfiguration( TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "S" }) );
+		addTestConfiguration( TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] { "S" }) );
+		addTestConfiguration( TEST_NAME3, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3, new String[] { "S" }) );
+	}
+		
+	@Test
+	public void testDAGMatrixCellwiseRewrite1() {
+		testCodegenIntegration( TEST_NAME1, true, false, ExecType.CP );
+	}
+		
+	@Test
+	public void testDAGMatrixCellwiseRewrite2() {
+		testCodegenIntegration( TEST_NAME2, true, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testDAGMatrixCellwiseRewrite3() {
+		testCodegenIntegration( TEST_NAME3, true, false, ExecType.CP  );
+	}
+
+	@Test
+	public void testDAGMatrixCellwise1() {
+		testCodegenIntegration( TEST_NAME1, false, false, ExecType.CP );
+	}
+		
+	@Test
+	public void testDAGMatrixCellwise2() {
+		testCodegenIntegration( TEST_NAME2, false, false, ExecType.CP  );
+	}
+	
+	@Test
+	public void testDAGMatrixCellwise3() {
+		testCodegenIntegration( TEST_NAME3, false, false, ExecType.CP  );
+	}
+
+	@Test
+	public void testDAGVectorCellwiseRewrite1() {
+		testCodegenIntegration( TEST_NAME1, true, true, ExecType.CP );
+	}
+		
+	@Test
+	public void testDAGVectorCellwiseRewrite2() {
+		testCodegenIntegration( TEST_NAME2, true, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testDAGVectorCellwiseRewrite3() {
+		testCodegenIntegration( TEST_NAME3, true, true, ExecType.CP  );
+	}
+
+	@Test
+	public void testDAGVectorCellwise1() {
+		testCodegenIntegration( TEST_NAME1, false, true, ExecType.CP );
+	}
+		
+	@Test
+	public void testDAGVectorCellwise2() {
+		testCodegenIntegration( TEST_NAME2, false, true, ExecType.CP  );
+	}
+	
+	@Test
+	public void testDAGVectorCellwise3() {
+		testCodegenIntegration( TEST_NAME3, false, true, ExecType.CP  );
+	}
+	
+	private void testCodegenIntegration( String testname, boolean rewrites, boolean vector, ExecType instType )
+	{	
+		
+		boolean oldRewrites = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
+		
+		switch( instType ){
+			case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break;
+			case SPARK: 
+				rtplatform = RUNTIME_PLATFORM.SPARK;
+				DMLScript.USE_LOCAL_SPARK_CONFIG = true; 
+				break;
+			default: rtplatform = RUNTIME_PLATFORM.HYBRID; break;
+		}
+		
+		try
+		{
+			TestConfiguration config = getTestConfiguration(testname);
+			loadTestConfiguration(config);
+			
+			int cols = vector ? 1 : 50;
+			
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + testname + ".dml";
+			programArgs = new String[]{"-explain", "runtime", "-stats", 
+					"-config=" + HOME + TEST_CONF, "-args", String.valueOf(cols), output("S") };
+			
+			fullRScriptName = HOME + testname + ".R";
+			rCmd = getRCmd(String.valueOf(cols), expectedDir());			
+
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = rewrites;
+
+			runTest(true, false, null, -1); 
+			runRScript(true); 
+			
+			//compare matrices 
+			HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("S");
+			HashMap<CellIndex, Double> rfile  = readRMatrixFromFS("S");	
+			TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R");
+			Assert.assertTrue(heavyHittersContainsSubString("spoof") || heavyHittersContainsSubString("sp_spoof"));
+		}
+		finally {
+			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = oldRewrites;
+			OptimizerUtils.ALLOW_AUTO_VECTORIZATION = true;
+			OptimizerUtils.ALLOW_OPERATOR_FUSION = true;
+		}
+	}	
+}