You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/05 23:46:29 UTC

systemml git commit: [SYSTEMML-1745] Codegen support for rowwise cumsum/cummin/cummax

Repository: systemml
Updated Branches:
  refs/heads/master 1e1d3727f -> f418c4460


[SYSTEMML-1745] Codegen support for rowwise cumsum/cummin/cummax

This patch adds support for cumsum, cummin, cummax unary vector
operations to the codegen compiler and runtime. Since cumsum only works
over columns, we identify specific t(cumsum(t(X))) patterns. For
example, on Kmeans this eliminates a number of unnecessary intermediates
and rowwise pipeline breakers. 

Furthermore, this patch also fixes cplan cleanups in order to avoid
removing plans with single operations if the surrounding template has an
additional aggregation operation.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/f418c446
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/f418c446
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/f418c446

Branch: refs/heads/master
Commit: f418c44600dfc4fa62ed5aa1df2891322cc98f0f
Parents: 1e1d372
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Jul 5 16:35:22 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Wed Jul 5 16:47:00 2017 -0700

----------------------------------------------------------------------
 .../sysml/hops/codegen/SpoofCompiler.java       |  10 +-
 .../sysml/hops/codegen/cplan/CNodeUnary.java    |  18 ++-
 .../hops/codegen/template/TemplateRow.java      |  24 ++-
 .../runtime/codegen/LibSpoofPrimitives.java     | 162 +++++++++++++++++++
 .../functions/codegen/RowAggTmplTest.java       |  37 ++++-
 .../scripts/functions/codegen/rowAggPattern28.R |  35 ++++
 .../functions/codegen/rowAggPattern28.dml       |  32 ++++
 .../scripts/functions/codegen/rowAggPattern29.R |  31 ++++
 .../functions/codegen/rowAggPattern29.dml       |  28 ++++
 9 files changed, 368 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
index 5342c09..87ed1a0 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/SpoofCompiler.java
@@ -84,6 +84,7 @@ import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.codegen.CodegenUtils;
 import org.apache.sysml.runtime.codegen.SpoofCellwise.CellType;
+import org.apache.sysml.runtime.codegen.SpoofRowwise.RowType;
 import org.apache.sysml.runtime.controlprogram.ForProgramBlock;
 import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
 import org.apache.sysml.runtime.controlprogram.IfProgramBlock;
@@ -716,9 +717,12 @@ public class SpoofCompiler
 			}
 			
 			//remove cplan w/ single op and w/o agg
-			if( (tpl instanceof CNodeCell && ((((CNodeCell)tpl).getCellType()==CellType.NO_AGG
-				&& TemplateUtils.hasSingleOperation(tpl)) || TemplateUtils.hasNoOperation(tpl)))
-				|| tpl instanceof CNodeRow && TemplateUtils.hasSingleOperation(tpl)) 
+			if( (tpl instanceof CNodeCell && ((CNodeCell)tpl).getCellType()==CellType.NO_AGG
+					&& TemplateUtils.hasSingleOperation(tpl) )
+				|| (tpl instanceof CNodeRow && (((CNodeRow)tpl).getRowType()==RowType.NO_AGG
+					|| ((CNodeRow)tpl).getRowType()==RowType.NO_AGG_B1)
+					&& TemplateUtils.hasSingleOperation(tpl))
+				|| TemplateUtils.hasNoOperation(tpl) ) 
 				cplans2.remove(e.getKey());
 				
 			//remove cplan if empty

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
index 85800b8..1a36604 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/cplan/CNodeUnary.java
@@ -31,6 +31,7 @@ public class CNodeUnary extends CNode
 		ROW_SUMS, ROW_MINS, ROW_MAXS, //codegen specific
 		VECT_EXP, VECT_POW2, VECT_MULT2, VECT_SQRT, VECT_LOG,
 		VECT_ABS, VECT_ROUND, VECT_CEIL, VECT_FLOOR, VECT_SIGN, 
+		VECT_CUMSUM, VECT_CUMMIN, VECT_CUMMAX,
 		EXP, POW2, MULT2, SQRT, LOG, LOG_NZ,
 		ABS, ROUND, CEIL, FLOOR, SIGN, 
 		SIN, COS, TAN, ASIN, ACOS, ATAN,
@@ -62,7 +63,10 @@ public class CNodeUnary extends CNode
 				case VECT_ROUND:
 				case VECT_CEIL:
 				case VECT_FLOOR:
-				case VECT_SIGN: {
+				case VECT_SIGN:
+				case VECT_CUMSUM:
+				case VECT_CUMMIN:
+				case VECT_CUMMAX:{
 					String vectName = getVectorPrimitiveName();
 					return sparse ? "    double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN1i%, %POS1%, alen, len);\n" : 
 									"    double[] %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %POS1%, %LEN%);\n";
@@ -128,7 +132,9 @@ public class CNodeUnary extends CNode
 				|| this == VECT_MULT2 || this == VECT_SQRT
 				|| this == VECT_LOG || this == VECT_ABS
 				|| this == VECT_ROUND || this == VECT_CEIL
-				|| this == VECT_FLOOR || this == VECT_SIGN;
+				|| this == VECT_FLOOR || this == VECT_SIGN
+				|| this == VECT_CUMSUM || this == VECT_CUMMIN
+				|| this == VECT_CUMMAX;
 		}
 		public UnaryType getVectorAddPrimitive() {
 			return UnaryType.valueOf("VECT_"+getVectorPrimitiveName().toUpperCase()+"_ADD");
@@ -212,6 +218,9 @@ public class CNodeUnary extends CNode
 			case VECT_ROUND:
 			case VECT_CEIL:
 			case VECT_FLOOR:
+			case VECT_CUMSUM:
+			case VECT_CUMMIN:
+			case VECT_CUMMAX:
 			case VECT_SIGN: return "u(v"+_type.name().toLowerCase()+")";
 			case LOOKUP_R:  return "u(ixr)";
 			case LOOKUP_C:  return "u(ixc)";
@@ -235,7 +244,10 @@ public class CNodeUnary extends CNode
 			case VECT_ROUND:
 			case VECT_CEIL:
 			case VECT_FLOOR:
-			case VECT_SIGN:	
+			case VECT_SIGN:
+			case VECT_CUMSUM:
+			case VECT_CUMMIN:
+			case VECT_CUMMAX:
 				_rows = _inputs.get(0)._rows;
 				_cols = _inputs.get(0)._cols;
 				_dataType= DataType.MATRIX;

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
index c0c8c4e..0bc0380 100644
--- a/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysml/hops/codegen/template/TemplateRow.java
@@ -57,7 +57,8 @@ public class TemplateRow extends TemplateBase
 {
 	private static final Hop.AggOp[] SUPPORTED_ROW_AGG = new AggOp[]{AggOp.SUM, AggOp.MIN, AggOp.MAX};
 	private static final Hop.OpOp1[] SUPPORTED_VECT_UNARY = new OpOp1[]{
-			OpOp1.EXP, OpOp1.SQRT, OpOp1.LOG, OpOp1.ABS, OpOp1.ROUND, OpOp1.CEIL, OpOp1.FLOOR, OpOp1.SIGN};
+			OpOp1.EXP, OpOp1.SQRT, OpOp1.LOG, OpOp1.ABS, OpOp1.ROUND, OpOp1.CEIL, OpOp1.FLOOR, OpOp1.SIGN,
+			OpOp1.CUMSUM, OpOp1.CUMMIN, OpOp1.CUMMAX};
 	private static final Hop.OpOp2[] SUPPORTED_VECT_BINARY = new OpOp2[]{
 			OpOp2.MULT, OpOp2.DIV, OpOp2.MINUS, OpOp2.PLUS, OpOp2.POW, OpOp2.MIN, OpOp2.MAX,
 			OpOp2.EQUAL, OpOp2.NOTEQUAL, OpOp2.LESS, OpOp2.LESSEQUAL, OpOp2.GREATER, OpOp2.GREATEREQUAL};
@@ -107,7 +108,8 @@ public class TemplateRow extends TemplateBase
 				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0)))
 			|| (hop instanceof AggBinaryOp && hop.dimsKnown() && isFuseSkinnyMatrixMult(hop) //MM
 				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
-				&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1));
+				&& hop.getInput().get(0).getDim1()>1 && hop.getInput().get(0).getDim2()>1)
+			|| isPartOfValidCumAggChain(hop) ); //cum* with transpose
 	}
 
 	@Override
@@ -144,6 +146,24 @@ public class TemplateRow extends TemplateBase
 		return LibMatrixMult.isSkinnyRightHandSide(in1.getDim2(), in1.getDim1(), hop.getDim1(), hop.getDim2())
 			|| LibMatrixMult.isSkinnyRightHandSide(in2.getDim1(), in2.getDim2(), hop.getDim2(), hop.getDim1());
 	}
+	
+	private static boolean isPartOfValidCumAggChain(Hop hop) {
+		//check if operation is part of t(cumsum(t(X))) chain, w/ single consumers
+		if( HopRewriteUtils.isTransposeOperation(hop) ) {
+			return (HopRewriteUtils.isUnary(hop.getInput().get(0), OpOp1.CUMSUM, OpOp1.CUMMIN, OpOp1.CUMMAX)
+				&& hop.getParent().size()==1 && HopRewriteUtils.isTransposeOperation(hop.getInput().get(0).getInput().get(0))
+				&& hop.getInput().get(0).getInput().get(0).getParent().size()==1)
+				|| (HopRewriteUtils.isUnary(hop.getParent().get(0), OpOp1.CUMSUM, OpOp1.CUMMIN, OpOp1.CUMMAX)
+				&& hop.getParent().size()==1 && HopRewriteUtils.isTransposeOperation(hop.getParent().get(0).getParent().get(0))
+				&& hop.getParent().get(0).getParent().size()==1);
+		}
+		else {
+			return (HopRewriteUtils.isUnary(hop, OpOp1.CUMSUM, OpOp1.CUMMIN, OpOp1.CUMMAX)
+				&& hop.getParent().size()==1 && HopRewriteUtils.isTransposeOperation(hop.getParent().get(0))
+				&& HopRewriteUtils.isTransposeOperation(hop.getInput().get(0))
+				&& hop.getInput().get(0).getParent().size()==1);
+		}
+	}
 
 	@Override
 	public Pair<Hop[], CNodeTpl> constructCplan(Hop hop, CPlanMemoTable memo, boolean compileLiterals) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
index 1108c08..1c59ee0 100644
--- a/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysml/runtime/codegen/LibSpoofPrimitives.java
@@ -644,6 +644,168 @@ public class LibSpoofPrimitives
 		return c;
 	}
 
+	//custom cumsum
+	
+	public static void vectCumsumAdd(double[] a, double[] c, int ai, int ci, int len) {
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val += a[ai * j]; 
+			c[ci+j] += val;
+		}
+	}
+
+	public static void vectCumsumAdd(double[] a, double[] c, int[] aix, int ai, int ci, int alen, int len) {
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			for( int j2=lastIx+1; j2<aix[j]; j2++ )
+				c[j2] += val;
+			//update value and add current index
+			val += a[j];
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		for( int j2=lastIx+1; j2<len; j2++ )
+			c[j2] += val;
+	}
+	
+	public static double[] vectCumsumWrite(double[] a, int ai, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val += a[ai+j];
+			c[j] = val;
+		}
+		return c;
+	}
+
+	public static double[] vectCumsumWrite(double[] a, int[] aix, int ai, int alen, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			Arrays.fill(c, lastIx+1, aix[j], val);
+			//update value and add current index
+			val += a[j];
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		Arrays.fill(c, lastIx+1, len, val);
+		return c;
+	}
+
+	//custom cummin
+	
+	public static void vectCumminAdd(double[] a, double[] c, int ai, int ci, int len) {
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val = Math.min(val, a[ai * j]); 
+			c[ci+j] += val;
+		}
+	}
+
+	public static void vectCumminAdd(double[] a, double[] c, int[] aix, int ai, int ci, int alen, int len) {
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			for( int j2=lastIx+1; j2<aix[j]; j2++ )
+				c[j2] += val;
+			//update value and add current index
+			val = Math.min(val, a[j]);
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		for( int j2=lastIx+1; j2<len; j2++ )
+			c[j2] += val;
+	}
+	
+	public static double[] vectCumminWrite(double[] a, int ai, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val = Math.min(val, a[ai+j]);
+			c[j] = val;
+		}
+		return c;
+	}
+
+	public static double[] vectCumminWrite(double[] a, int[] aix, int ai, int alen, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			Arrays.fill(c, lastIx+1, aix[j], val);
+			//update value and add current index
+			val = Math.min(val, a[j]);
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		Arrays.fill(c, lastIx+1, len, val);
+		return c;
+	}
+	
+	//custom cummax
+
+	public static void vectCummaxAdd(double[] a, double[] c, int ai, int ci, int len) {
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val = Math.max(val, a[ai * j]); 
+			c[ci+j] += val;
+		}
+	}
+
+	public static void vectCummaxAdd(double[] a, double[] c, int[] aix, int ai, int ci, int alen, int len) {
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			for( int j2=lastIx+1; j2<aix[j]; j2++ )
+				c[j2] += val;
+			//update value and add current index
+			val = Math.max(val, a[j]);
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		for( int j2=lastIx+1; j2<len; j2++ )
+			c[j2] += val;
+	}
+	
+	public static double[] vectCummaxWrite(double[] a, int ai, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		for( int j = 0; j < len; j++ ) {
+			val = Math.max(val, a[ai+j]);
+			c[j] = val;
+		}
+		return c;
+	}
+
+	public static double[] vectCummaxWrite(double[] a, int[] aix, int ai, int alen, int len) {
+		double[] c = allocVector(len, false);
+		double val = 0;
+		int lastIx = -1;
+		for( int j = ai; j < ai+alen; j++ ) {
+			//add non-existing indexes
+			Arrays.fill(c, lastIx+1, aix[j], val);
+			//update value and add current index
+			val = Math.max(val, a[j]);
+			c[aix[j]] = val;
+			lastIx = aix[j];
+		}
+		//add non-existing indexes
+		Arrays.fill(c, lastIx+1, len, val);
+		return c;
+	}
+	
 	//custom log
 	
 	public static void vectLogAdd(double[] a, double[] c, int ai, int ci, int len) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
index e32056a..8f5f03f 100644
--- a/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/functions/codegen/RowAggTmplTest.java
@@ -63,6 +63,8 @@ public class RowAggTmplTest extends AutomatedTestBase
 	private static final String TEST_NAME25 = TEST_NAME+"25"; //-2*(X%*%t(C))+t(rowSums(C^2)), w/ mm
 	private static final String TEST_NAME26 = TEST_NAME+"26"; //t(P)%*%X, w/ mm
 	private static final String TEST_NAME27 = TEST_NAME+"27"; //t(X)%*%(X%*%v), w/ mm 
+	private static final String TEST_NAME28 = TEST_NAME+"28"; //Kmeans, final eval
+	private static final String TEST_NAME29 = TEST_NAME+"29"; //sum(rowMins(X))
 	
 	private static final String TEST_DIR = "functions/codegen/";
 	private static final String TEST_CLASS_DIR = TEST_DIR + RowAggTmplTest.class.getSimpleName() + "/";
@@ -74,7 +76,7 @@ public class RowAggTmplTest extends AutomatedTestBase
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for(int i=1; i<=27; i++)
+		for(int i=1; i<=29; i++)
 			addTestConfiguration( TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) }) );
 	}
 	
@@ -483,6 +485,36 @@ public class RowAggTmplTest extends AutomatedTestBase
 		testCodegenIntegration( TEST_NAME27, false, ExecType.SPARK );
 	}
 	
+	@Test	
+	public void testCodegenRowAggRewrite28CP() {
+		testCodegenIntegration( TEST_NAME28, true, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenRowAgg28CP() {
+		testCodegenIntegration( TEST_NAME28, false, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenRowAgg28SP() {
+		testCodegenIntegration( TEST_NAME28, false, ExecType.SPARK );
+	}
+	
+	@Test	
+	public void testCodegenRowAggRewrite29CP() {
+		testCodegenIntegration( TEST_NAME29, true, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenRowAgg29CP() {
+		testCodegenIntegration( TEST_NAME29, false, ExecType.CP );
+	}
+	
+	@Test
+	public void testCodegenRowAgg29SP() {
+		testCodegenIntegration( TEST_NAME29, false, ExecType.SPARK );
+	}
+	
 	private void testCodegenIntegration( String testname, boolean rewrites, ExecType instType )
 	{	
 		boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
@@ -526,6 +558,9 @@ public class RowAggTmplTest extends AutomatedTestBase
 				Assert.assertTrue(!heavyHittersContainsSubString("uark+"));
 			if( testname.equals(TEST_NAME17) )
 				Assert.assertTrue(!heavyHittersContainsSubString("rangeReIndex"));
+			if( testname.equals(TEST_NAME28) )
+				Assert.assertTrue(!heavyHittersContainsSubString("spoofRA", 2)
+					&& !heavyHittersContainsSubString("sp_spoofRA", 2));
 		}
 		finally {
 			rtplatform = platformOld;

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/test/scripts/functions/codegen/rowAggPattern28.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern28.R b/src/test/scripts/functions/codegen/rowAggPattern28.R
new file mode 100644
index 0000000..9ba1cf6
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern28.R
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000)/6000, 600, 10, byrow=TRUE);
+C = matrix(seq(1,40)/40, 4, 10, byrow=TRUE);
+
+D =  -2 * (X %*% t(C)) + matrix(1,nrow(X),1)%*%t(rowSums (C ^ 2));
+P = (D <= (rowMins (D) %*% matrix(1, 1, ncol(D))));
+aggr_P = t(apply(t(P), 2, cumsum));
+R = rowSums (aggr_P == 0) + 1
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep="")); 

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/test/scripts/functions/codegen/rowAggPattern28.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern28.dml b/src/test/scripts/functions/codegen/rowAggPattern28.dml
new file mode 100644
index 0000000..bc40685
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern28.dml
@@ -0,0 +1,32 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(seq(1,6000)/6000, 600, 10);
+C = matrix(seq(1,40)/40, 4, 10);
+if(1==1){}
+
+D =  -2 * (X %*% t(C)) + t(rowSums (C ^ 2));
+P = (D <= rowMins (D));
+aggr_P = t(cumsum (t(P)));
+R = rowSums (aggr_P == 0) + 1
+
+write(R, $1)

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/test/scripts/functions/codegen/rowAggPattern29.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern29.R b/src/test/scripts/functions/codegen/rowAggPattern29.R
new file mode 100644
index 0000000..5316f32
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern29.R
@@ -0,0 +1,31 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+library("matrixStats")
+
+X = matrix(seq(1,6000)/6000, 600, 10, byrow=TRUE);
+
+R = as.matrix(sum(rowMins(X)));
+
+writeMM(as(R, "CsparseMatrix"), paste(args[2], "S", sep="")); 

http://git-wip-us.apache.org/repos/asf/systemml/blob/f418c446/src/test/scripts/functions/codegen/rowAggPattern29.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/functions/codegen/rowAggPattern29.dml b/src/test/scripts/functions/codegen/rowAggPattern29.dml
new file mode 100644
index 0000000..236d449
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern29.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+X = matrix(seq(1,6000)/6000, 600, 10);
+if(1==1){}
+
+R = as.matrix(sum(rowMins(X)));
+
+write(R, $1)